from dateutil import parser as dateutil_parser import feedparser ls_rss_urls = [ "https://vneconomy.vn/chung-khoan.rss", "https://cafebiz.vn/rss/chung-khoan.rss", "https://vietstock.vn/830/chung-khoan/co-phieu.rss", "https://vietstock.vn/739/chung-khoan/giao-dich-noi-bo.rss", "https://thitruongtaichinh.kinhtedothi.vn/rss/chung-khoan-182.rss", "https://nhandan.vn/rss/chungkhoan-1191.rss", "https://thanhnien.vn/rss/kinh-te/chung-khoan.rss", "https://www.sggp.org.vn/rss/kinhte-taichinhchuungkhoan-44.rss", "https://antt.vn/rss/chung-khoan.rss", "https://nganhangvietnam.vn/rss/chung-khoan.rss", "https://nld.com.vn/rss/kinh-te/tai-chinh-chung-khoan.rss", "https://soha.vn/rss/kinh-doanh/chung-khoan.rss", "https://tienphong.vn/rss/tai-chinh-chung-khoan-105.rss", "https://www.nguoiduatin.vn/rss/kinh-te/tai-chinh-ngan-hang.rss", "https://bnews.vn/rss/chung-khoan-33.rss", ] def rss_spider(): # ---------- all_entries = [] for rss_url in ls_rss_urls: rss_res = feedparser.parse(rss_url) all_entries += rss_res.entries print(f"{'✅' if len(rss_res.entries)>0 else '❌'} {rss_url} > {len(rss_res.entries)}") # try: # published_text = rss_res.entries[0]['published'] # print(f"Test: {published_text} -> {dateutil_parser.parse(published_text)}") # except: # pass # ---------- # ls_type_of_keys = [] # for e in all_entries: # if list(e.keys()) not in ls_type_of_keys: # ls_type_of_keys.append(list(e.keys())) # ls_common_keys = list(set(ls_type_of_keys[0]).intersection(*ls_type_of_keys[1:])) # print(ls_common_keys) # ---------- all_entries_clean = [] for e in all_entries: all_entries_clean.append({ "title": e["title"], "link": e["link"], "summary": e["summary"], "time": dateutil_parser.parse(e["published"]), # "published_parsed": e["published_parsed"], # "title_detail": e["title_detail"], # "id": e["id"], # "links": e["links"], # "published": e["published"], # "guidislink": e["guidislink"], # "summary_detail": e["summary_detail"], }) # ---------- all_entries_sorted = sorted(all_entries_clean, key=lambda x: x["time"], reverse=True) # ---------- return all_entries_sorted[:100]