File size: 2,451 Bytes
81867b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from dateutil import parser as dateutil_parser
import feedparser

ls_rss_urls = [
    "https://vneconomy.vn/chung-khoan.rss",
    "https://cafebiz.vn/rss/chung-khoan.rss",
    "https://vietstock.vn/830/chung-khoan/co-phieu.rss",
    "https://vietstock.vn/739/chung-khoan/giao-dich-noi-bo.rss",
    "https://thitruongtaichinh.kinhtedothi.vn/rss/chung-khoan-182.rss",
    "https://nhandan.vn/rss/chungkhoan-1191.rss",
    "https://thanhnien.vn/rss/kinh-te/chung-khoan.rss",
    "https://www.sggp.org.vn/rss/kinhte-taichinhchuungkhoan-44.rss",
    "https://antt.vn/rss/chung-khoan.rss",
    "https://nganhangvietnam.vn/rss/chung-khoan.rss",
    "https://nld.com.vn/rss/kinh-te/tai-chinh-chung-khoan.rss",
    "https://soha.vn/rss/kinh-doanh/chung-khoan.rss",
    "https://tienphong.vn/rss/tai-chinh-chung-khoan-105.rss",
    "https://www.nguoiduatin.vn/rss/kinh-te/tai-chinh-ngan-hang.rss",
    "https://bnews.vn/rss/chung-khoan-33.rss",
]

def rss_spider():
    # ----------
    all_entries = []
    for rss_url in ls_rss_urls:
        rss_res = feedparser.parse(rss_url)
        all_entries += rss_res.entries
        print(f"{'✅' if len(rss_res.entries)>0 else '❌'} {rss_url} > {len(rss_res.entries)}")
        # try:
        #     published_text = rss_res.entries[0]['published']
        #     print(f"Test: {published_text} -> {dateutil_parser.parse(published_text)}")
        # except:
        #     pass
    # ----------
    # ls_type_of_keys = []
    # for e in all_entries:
    #     if list(e.keys()) not in ls_type_of_keys:
    #         ls_type_of_keys.append(list(e.keys()))
    # ls_common_keys = list(set(ls_type_of_keys[0]).intersection(*ls_type_of_keys[1:]))
    # print(ls_common_keys)
    # ----------
    all_entries_clean = []
    for e in all_entries:
        all_entries_clean.append({
            "title": e["title"],
            "link": e["link"],
            "summary": e["summary"],
            "time": dateutil_parser.parse(e["published"]),
            # "published_parsed": e["published_parsed"],
            # "title_detail": e["title_detail"],
            # "id": e["id"],
            # "links": e["links"],
            # "published": e["published"],
            # "guidislink": e["guidislink"],
            # "summary_detail": e["summary_detail"],
        })
    # ----------
    all_entries_sorted = sorted(all_entries_clean, key=lambda x: x["time"], reverse=True)
    # ----------
    return all_entries_sorted[:100]