Spaces:

onelevelstudio
/

NEWS

Running

App Files Files

baobuiquang commited on Feb 11

Commit

b5a1c35

verified ·

1 Parent(s): 073c296

initial commit

Browse files

Files changed (3) hide show

_rss_parser.py +197 -0
app.py +183 -0
requirements.txt +0 -0

_rss_parser.py ADDED Viewed

	@@ -0,0 +1,197 @@

+MAX_ENTRIES_EACH_RSS = 5
+MAX_TOTAL_ENTRIES = 200
+from dateutil import parser as dateutil_parser
+from datetime import datetime, timezone
+from bs4 import BeautifulSoup
+import feedparser
+import humanize
+import html
+import re
+ls_rss_urls = [
+    {
+        "meta_url": "https://vneconomy.vn/chung-khoan.rss",
+        "meta_domain": "vneconomy.vn (Hội Khoa học Kinh tế Việt Nam)",
+        "meta_label": "chứng khoán",
+    },
+    {
+        "meta_url": "https://cafebiz.vn/rss/chung-khoan.rss",
+        "meta_domain": "cafebiz.vn",
+        "meta_label": "chứng khoán",
+    },
+    {
+        "meta_url": "https://vietstock.vn/830/chung-khoan/co-phieu.rss",
+        "meta_domain": "vietstock.vn",
+        "meta_label": "cổ phiếu",
+    },
+    {
+        "meta_url": "https://vietstock.vn/739/chung-khoan/giao-dich-noi-bo.rss",
+        "meta_domain": "vietstock.vn",
+        "meta_label": "giao dịch nội bộ",
+    },
+    {
+        "meta_url": "https://nhandan.vn/rss/chungkhoan-1191.rss",
+        "meta_domain": "nhandan.vn (Đảng Cộng sản Việt Nam)",
+        "meta_label": "chứng khoán",
+    },
+    {
+        "meta_url": "https://thanhnien.vn/rss/kinh-te/chung-khoan.rss",
+        "meta_domain": "thanhnien.vn (Hội Liên hiệp Thanh niên Việt Nam)",
+        "meta_label": "chứng khoán",
+    },
+    {
+        "meta_url": "https://www.sggp.org.vn/rss/kinhte-taichinhchuungkhoan-44.rss",
+        "meta_domain": "sggp.org.vn (Đảng bộ Đảng Cộng sản Việt Nam TP.HCM)",
+        "meta_label": "kinh tế, tài chính, chứng khoán",
+    },
+    {
+        "meta_url": "https://antt.vn/rss/chung-khoan.rss",
+        "meta_domain": "antt.vn",
+        "meta_label": "chứng khoán",
+    },
+    {
+        "meta_url": "https://nld.com.vn/rss/kinh-te/tai-chinh-chung-khoan.rss",
+        "meta_domain": "nld.com.vn (Thành ủy TP.HCM)",
+        "meta_label": "tài chính, chứng khoán",
+    },
+    {
+        "meta_url": "https://soha.vn/rss/kinh-doanh/chung-khoan.rss",
+        "meta_domain": "soha.vn",
+        "meta_label": "chứng khoán",
+    },
+    {
+        "meta_url": "https://tienphong.vn/rss/tai-chinh-chung-khoan-105.rss",
+        "meta_domain": "tienphong.vn (Đoàn TNCS Hồ Chí Minh)",
+        "meta_label": "tài chính, chứng khoán",
+    },
+    {
+        "meta_url": "https://www.nguoiduatin.vn/rss/kinh-te/tai-chinh-ngan-hang.rss",
+        "meta_domain": "nguoiduatin.vn (Hội Luật gia Việt Nam)",
+        "meta_label": "kinh tế, tài chính, ngân hàng",
+    },
+    {
+        "meta_url": "https://bnews.vn/rss/chung-khoan-33.rss",
+        "meta_domain": "bnews.vn (Thông tấn xã Việt Nam)",
+        "meta_label": "chứng khoán",
+    },
+    {
+        "meta_url": "https://vnexpress.net/rss/kinh-doanh.rss",
+        "meta_domain": "vnexpress.net (Bộ Khoa học và Công nghệ)",
+        "meta_label": "kinh doanh",
+    },
+    {
+        "meta_url": "https://vtv.vn/rss/kinh-te.rss",
+        "meta_domain": "vtv.vn (Đài Truyền hình Việt Nam)",
+        "meta_label": "kinh tế",
+    },
+    {
+        "meta_url": "https://rss.nytimes.com/services/xml/rss/nyt/Economy.xml",
+        "meta_domain": "nytimes.com (The New York Times - US)",
+        "meta_label": "economy",
+    },
+    {
+        "meta_url": "https://feeds.content.dowjones.io/public/rss/socialeconomyfeed",
+        "meta_domain": "wsj.com (The Wall Street Journal - US)",
+        "meta_label": "economy",
+    },
+    {
+        "meta_url": "https://abcnews.go.com/abcnews/moneyheadlines",
+        "meta_domain": "abcnews.com (ABC News - US)",
+        "meta_label": "money",
+    },
+    {
+        "meta_url": "https://www.wired.com/feed/category/business/latest/rss",
+        "meta_domain": "wired.com (WIRED - US)",
+        "meta_label": "business",
+    },
+    {
+        "meta_url": "https://www.theguardian.com/uk/business/rss",
+        "meta_domain": "theguardian.com (The Guardian - UK)",
+        "meta_label": "business",
+    },
+    {
+        "meta_url": "https://feeds.npr.org/1017/rss.xml",
+        "meta_domain": "npr.org (National Public Radio - US)",
+        "meta_label": "economy",
+    },
+    {
+        "meta_url": "https://www.economywatch.com/feed",
+        "meta_domain": "economywatch.com (Economy Watch - UK)",
+        "meta_label": "economy",
+    },
+]
+def normalize_time(dt):
+    if dt.tzinfo is None:
+        return dt.replace(tzinfo=timezone.utc)
+    return dt.astimezone(timezone.utc)
+def humanize_ago(dt):
+    now = datetime.now(timezone.utc)
+    dt_utc = dt.astimezone(timezone.utc)
+    return humanize.naturaltime(now - dt_utc).capitalize()
+def clean_text(str_html):
+    # Fix missing the leading &
+    str_html = html.unescape(re.sub(r'#(\d+);', r'&#\1;', str_html))
+    # Remove all img tags
+    soup = BeautifulSoup(str_html, "html.parser")
+    for img in soup.find_all("img"):
+        img.decompose()
+    str_html = str(soup)
+    # 🍌 Remove all HTML tags
+    str_html = soup.get_text(separator=' ', strip=True)
+    # Return
+    return str_html
+def rss_parser():
+    # ----------
+    all_entries = []
+    for myrss in ls_rss_urls:
+        try:
+            new_entries = feedparser.parse(myrss["meta_url"]).entries
+            for ent in new_entries:
+                ent["meta_url"] = myrss["meta_url"]
+                ent["meta_domain"] = myrss["meta_domain"]
+                ent["meta_label"] = myrss["meta_label"]
+            all_entries += new_entries[:MAX_ENTRIES_EACH_RSS]
+            print(f"{'✅' if len(new_entries)>0 else '❌'} {myrss['meta_url']} > {len(new_entries)}")
+        except:
+            pass
+        # try:
+        #     published_text = new_entries[0]['published']
+        #     print(f"Test: {published_text} -> {dateutil_parser.parse(published_text)}")
+        # except:
+        #     pass
+    # ----------
+    # ls_type_of_keys = []
+    # for e in all_entries:
+    #     if list(e.keys()) not in ls_type_of_keys:
+    #         ls_type_of_keys.append(list(e.keys()))
+    # ls_common_keys = list(set(ls_type_of_keys[0]).intersection(*ls_type_of_keys[1:]))
+    # print(ls_common_keys)
+    # ----------
+    all_entries_clean = []
+    for e in all_entries:
+        all_entries_clean.append({
+            "title": clean_text(e["title"]),
+            "link": e["link"],
+            "summary": clean_text(e["summary"]),
+            "time": normalize_time(dateutil_parser.parse(e["published"])),
+            "time_ago": humanize_ago(normalize_time(dateutil_parser.parse(e["published"]))),
+            # "published_parsed": e["published_parsed"],
+            # "title_detail": e["title_detail"],
+            # "id": e["id"],
+            # "links": e["links"],
+            # "published": e["published"],
+            # "guidislink": e["guidislink"],
+            # "summary_detail": e["summary_detail"],
+            "meta_url": e["meta_url"],
+            "meta_domain": e["meta_domain"],
+            "meta_label": e["meta_label"],
+        })
+    # ----------
+    all_entries_sorted = sorted(all_entries_clean, key=lambda x: x["time"], reverse=True)
+    # ----------
+    return all_entries_sorted[:MAX_TOTAL_ENTRIES]

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import gradio as gr
+from _rss_parser import rss_parser
+# ====================================================================================================
+# ====================================================================================================
+# ====================================================================================================
+def fn_display_all_news():
+    html_content = ""
+    html_content += "<div id='all-news-items'>"
+    for e in rss_parser():
+        html_content += f"""
+        <div class='news-item'>
+            <div class='news-info'>
+                {e['time_ago']} • {e['meta_domain']}
+            </div>
+            <a target='_blank' href={e['link']}>
+                <p class='news-title'>
+                    {e['title']}
+                </p>
+            </a>
+            <div class='news-summary'>
+                {e['summary'][:500]}
+            </div>
+        </div>
+        """
+    html_content += "</div>"
+    return html_content
+# ====================================================================================================
+# ====================================================================================================
+# ====================================================================================================
+theme = gr.themes.Base(
+    primary_hue="neutral",
+    secondary_hue="neutral",
+    neutral_hue="neutral",
+    text_size="lg",
+    font=[gr.themes.GoogleFont('Inter')],
+    font_mono=[gr.themes.GoogleFont('Manufacturing Consent')],
+)
+head = """
+<link rel="icon" href="https://cdn.jsdelivr.net/gh/OneLevelStudio/CORE/STATIC/1LV_LOGO_DARK.png">
+"""
+# * { -ms-overflow-style: none; scrollbar-width: none; }
+# *::-webkit-scrollbar { display: none; }
+css = """
+#huggingface-space-header { display: none !important; }
+footer { display: none !important; }
+body {
+    overflow: hidden !important;
+}
+main {
+    padding: 0 !important;
+    max-width: 100% !important;
+}
+textarea {
+    padding-top: 5px !important;
+    padding-bottom: 6px !important;
+}
+.row, .column {
+    gap: 0 !important;
+}
+/* ---------- Scrollbar ---------- */
+::-webkit-scrollbar {
+    background: transparent;
+    width: 8px;
+    border-radius: 999px;
+}
+::-webkit-scrollbar-track {
+    background: transparent;
+    border-radius: 999px;
+}
+::-webkit-scrollbar-thumb {
+    background: hsla(0, 0%, 50%, 0.5);
+    border-radius: 999px;
+}
+::-webkit-scrollbar-thumb:hover {
+    background: hsla(0, 0%, 50%, 0.9);
+}
+/* ---------- Desktop/Mobile Only ---------- */
+.desktop-only {
+    display: block;
+}
+@media only screen and (max-width: 1000px) {
+    .desktop-only {
+        display: none;
+    }
+}
+.mobile-only {
+    display: block;
+}
+@media only screen and (min-width: 1000px) {
+    .mobile-only {
+        display: none;
+    }
+}
+/* ---------- ---------- */
+#all-news-items {
+    display: flex;
+    flex-direction: column;
+    height: 100svh;
+    overflow-y: scroll;
+    border-left: solid 1px hsla(0, 0%, 50%, 0.0);
+    border-right: solid 1px hsla(0, 0%, 50%, 0.0);
+    padding: 64px 33svw;
+    gap: 64px;
+}
+@media only screen and (max-width: 1000px) {
+    #all-news-items {
+        padding: 64px 16px;
+    }
+}
+.news-item {
+    border-radius: 8px;
+    background: hsla(0, 0%, 100%, 0.00);
+    border: solid 1px hsla(0, 0%, 100%, 0.00);
+    padding: 0px;
+}
+.news-item a {
+    padding: 0 !important;
+    text-align: left !important;
+}
+.news-item a .news-title {
+    font-size: 20px !important;
+    font-weight: 600 !important;
+    line-height: 1.3 !important;
+    margin: 0 !important;
+    color: white !important;
+}
+.news-info, .news-info * {
+    font-size: 14px !important;
+    color: grey !important;
+}
+.news-summary, .news-summary * {
+    font-size: 14px !important;
+    color: grey !important;
+    margin: 0 !important;
+    line-height: 1.5 !important;
+    text-decoration: none !important;
+    text-align: justify;
+}
+.news-info {
+    margin-bottom: 4px !important;
+}
+.news-summary {
+    margin-top: 8px !important;
+}
+/* ---------- ---------- */
+#title-the-news {
+    border: none !important;
+    background: transparent !important;
+}
+#title-the-news .top-panel,
+#title-the-news .cm-gutters {
+    display: none !important;
+}
+#title-the-news .cm-line {
+    font-size: 32px;
+    text-align: center;
+}
+"""
+# ====================================================================================================
+# ====================================================================================================
+# ====================================================================================================
+with gr.Blocks(title="The News") as demo:
+    with gr.Row():
+        # with gr.Column(scale=1):
+        #     gr.Markdown()
+        with gr.Column(scale=1):
+            gr.Code("The News", container=False, show_label=False, show_line_numbers=False, elem_id="title-the-news")
+            display_all_news = gr.HTML("<div style='text-align: center; margin-top: 37svh;'>🕷 Spiders are crawling the web 🕷</div>")
+        # with gr.Column(scale=1):
+        #     gr.Markdown()
+    demo.load(
+        fn=lambda: fn_display_all_news(),
+        inputs=[],
+        outputs=[display_all_news],
+        show_progress="full",
+    )
+demo.launch(theme=theme, head=head, css=css)

requirements.txt ADDED Viewed

Binary file (120 Bytes). View file