Spaces:
Sleeping
Sleeping
| from dateutil import parser as dateutil_parser | |
| import feedparser | |
| ls_rss_urls = [ | |
| "https://vneconomy.vn/chung-khoan.rss", | |
| "https://cafebiz.vn/rss/chung-khoan.rss", | |
| "https://vietstock.vn/830/chung-khoan/co-phieu.rss", | |
| "https://vietstock.vn/739/chung-khoan/giao-dich-noi-bo.rss", | |
| "https://thitruongtaichinh.kinhtedothi.vn/rss/chung-khoan-182.rss", | |
| "https://nhandan.vn/rss/chungkhoan-1191.rss", | |
| "https://thanhnien.vn/rss/kinh-te/chung-khoan.rss", | |
| "https://www.sggp.org.vn/rss/kinhte-taichinhchuungkhoan-44.rss", | |
| "https://antt.vn/rss/chung-khoan.rss", | |
| "https://nganhangvietnam.vn/rss/chung-khoan.rss", | |
| "https://nld.com.vn/rss/kinh-te/tai-chinh-chung-khoan.rss", | |
| "https://soha.vn/rss/kinh-doanh/chung-khoan.rss", | |
| "https://tienphong.vn/rss/tai-chinh-chung-khoan-105.rss", | |
| "https://www.nguoiduatin.vn/rss/kinh-te/tai-chinh-ngan-hang.rss", | |
| "https://bnews.vn/rss/chung-khoan-33.rss", | |
| ] | |
| def rss_spider(): | |
| # ---------- | |
| all_entries = [] | |
| for rss_url in ls_rss_urls: | |
| rss_res = feedparser.parse(rss_url) | |
| all_entries += rss_res.entries | |
| print(f"{'β ' if len(rss_res.entries)>0 else 'β'} {rss_url} > {len(rss_res.entries)}") | |
| # try: | |
| # published_text = rss_res.entries[0]['published'] | |
| # print(f"Test: {published_text} -> {dateutil_parser.parse(published_text)}") | |
| # except: | |
| # pass | |
| # ---------- | |
| # ls_type_of_keys = [] | |
| # for e in all_entries: | |
| # if list(e.keys()) not in ls_type_of_keys: | |
| # ls_type_of_keys.append(list(e.keys())) | |
| # ls_common_keys = list(set(ls_type_of_keys[0]).intersection(*ls_type_of_keys[1:])) | |
| # print(ls_common_keys) | |
| # ---------- | |
| all_entries_clean = [] | |
| for e in all_entries: | |
| all_entries_clean.append({ | |
| "title": e["title"], | |
| "link": e["link"], | |
| "summary": e["summary"], | |
| "time": dateutil_parser.parse(e["published"]), | |
| # "published_parsed": e["published_parsed"], | |
| # "title_detail": e["title_detail"], | |
| # "id": e["id"], | |
| # "links": e["links"], | |
| # "published": e["published"], | |
| # "guidislink": e["guidislink"], | |
| # "summary_detail": e["summary_detail"], | |
| }) | |
| # ---------- | |
| all_entries_sorted = sorted(all_entries_clean, key=lambda x: x["time"], reverse=True) | |
| # ---------- | |
| return all_entries_sorted[:100] |