Added classes for aimodel and article/article_list, implemented wrapper

to use aimodel to extract keyword from a article's content
This commit is contained in:
2024-01-08 04:14:38 +00:00
parent 9d51bd91a8
commit 3dd7d65ee7
8 changed files with 442 additions and 386 deletions
+15 -8
View File
@@ -2,6 +2,10 @@ import os
import requests
import xmltodict
import pprint
from article_list import article, article_list
import re
CLEANR = re.compile("<.*?>")
def download_xml(url: str, path: str) -> bool:
@@ -30,8 +34,8 @@ def read_xml_from_file(path: str) -> dict:
def xml_dict_to_article_list(
xml_dict: dict, tagpath: list, reserved_fields: list
) -> list:
xml_dict: dict, tagpath: list, reserved_fields: list = ["title", "description"]
) -> article_list:
"""
Translate a xml dict struct into a article_list
xml_dict: the payload of xml dict
@@ -41,11 +45,15 @@ def xml_dict_to_article_list(
root = xml_dict
for tag in tagpath:
root = root[tag]
article_list = []
alist = article_list()
for item in root:
tmp_dict = {i: item[i] for i in item if i in reserved_fields}
article_list.append(tmp_dict)
return article_list
title = "".join(tmp_dict["title"]).strip()
title = re.sub(CLEANR, "", title)
description = "".join(tmp_dict["description"]).strip()
description = re.sub(CLEANR, "", description)
alist.append(article(title, description))
return alist
if __name__ == "__main__":
@@ -56,7 +64,6 @@ if __name__ == "__main__":
download_xml(url=url, path=filename)
xml_dict = read_xml_from_file(filename)
article_list = xml_dict_to_article_list(
xml_dict, ["rss", "channel", "item"], "title"
xml_dict, ["rss", "channel", "item"], ["title", "description"]
)
pp = pprint.PrettyPrinter()
pp.pprint(article_list)
print(str(article_list))