Added classes for aimodel and article/article_list, implemented wrapper
to use aimodel to extract keyword from a article's content
This commit is contained in:
+15
-8
@@ -2,6 +2,10 @@ import os
|
||||
import requests
|
||||
import xmltodict
|
||||
import pprint
|
||||
from article_list import article, article_list
|
||||
import re
|
||||
|
||||
CLEANR = re.compile("<.*?>")
|
||||
|
||||
|
||||
def download_xml(url: str, path: str) -> bool:
|
||||
@@ -30,8 +34,8 @@ def read_xml_from_file(path: str) -> dict:
|
||||
|
||||
|
||||
def xml_dict_to_article_list(
|
||||
xml_dict: dict, tagpath: list, reserved_fields: list
|
||||
) -> list:
|
||||
xml_dict: dict, tagpath: list, reserved_fields: list = ["title", "description"]
|
||||
) -> article_list:
|
||||
"""
|
||||
Translate a xml dict struct into a article_list
|
||||
xml_dict: the payload of xml dict
|
||||
@@ -41,11 +45,15 @@ def xml_dict_to_article_list(
|
||||
root = xml_dict
|
||||
for tag in tagpath:
|
||||
root = root[tag]
|
||||
article_list = []
|
||||
alist = article_list()
|
||||
for item in root:
|
||||
tmp_dict = {i: item[i] for i in item if i in reserved_fields}
|
||||
article_list.append(tmp_dict)
|
||||
return article_list
|
||||
title = "".join(tmp_dict["title"]).strip()
|
||||
title = re.sub(CLEANR, "", title)
|
||||
description = "".join(tmp_dict["description"]).strip()
|
||||
description = re.sub(CLEANR, "", description)
|
||||
alist.append(article(title, description))
|
||||
return alist
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -56,7 +64,6 @@ if __name__ == "__main__":
|
||||
download_xml(url=url, path=filename)
|
||||
xml_dict = read_xml_from_file(filename)
|
||||
article_list = xml_dict_to_article_list(
|
||||
xml_dict, ["rss", "channel", "item"], "title"
|
||||
xml_dict, ["rss", "channel", "item"], ["title", "description"]
|
||||
)
|
||||
pp = pprint.PrettyPrinter()
|
||||
pp.pprint(article_list)
|
||||
print(str(article_list))
|
||||
|
||||
Reference in New Issue
Block a user