Files
smartrss/fetch_exper_data.py
T

63 lines
1.7 KiB
Python

import os
import requests
import xmltodict
import pprint
def download_xml(url: str, path: str) -> bool:
"""
Download xml from given url and store into file
url: request url
path: where to store xml payload to
"""
response = requests.get(url)
if response.status_code != 200:
return False
with open(path, "w") as f:
f.write(response.text)
return True
def read_xml_from_file(path: str) -> dict:
"""
Read xml from file and transmit it to a dict struct
path: local xml file path
"""
with open(path, "r") as f:
data = f.read()
xml_dict = xmltodict.parse(data)
return xml_dict
def xml_dict_to_article_list(
xml_dict: dict, tagpath: list, reserved_fields: list
) -> list:
"""
Translate a xml dict struct into a article_list
xml_dict: the payload of xml dict
tagpath: a list contains the iteration path from the root element to the itemlist, e.g ["rss", "channel", "item"]
reserved_fields: for each item in itemlist, which fieleds will be presevered, e.g "title", "description", "pubdate"..
"""
root = xml_dict
for tag in tagpath:
root = root[tag]
article_list = []
for item in root:
tmp_dict = {i: item[i] for i in item if i in reserved_fields}
article_list.append(tmp_dict)
return article_list
if __name__ == "__main__":
url_root = os.getenv("RSSHUB_ROOT")
assert url_root is not None
url = url_root + "/yicai/brief"
filename = "exper/raw/yicai.brief.xml"
download_xml(url=url, path=filename)
xml_dict = read_xml_from_file(filename)
article_list = xml_dict_to_article_list(
xml_dict, ["rss", "channel", "item"], "title"
)
pp = pprint.PrettyPrinter()
pp.pprint(article_list)