Spaces:
Sleeping
Sleeping
| from datetime import datetime, timezone, timedelta | |
| from dateutil.tz import tzoffset, tzutc | |
| from urllib.parse import urlparse | |
| from bs4 import BeautifulSoup | |
| import gradio as gr | |
| import humanize | |
| import html | |
| import json | |
| import time | |
| import re | |
| import os | |
| from _vendor import vendor_llm_endpoint, vendor_llm_model | |
| from _spider import rss_spider | |
| # ==================================================================================================== | |
| def get_main_domain(url): | |
| if "nld.com.vn" in url: | |
| return "nld.com.vn" | |
| if "sggp.org.vn" in url: | |
| return "sggp.org.vn" | |
| hostname = urlparse(url).hostname | |
| if hostname is None: | |
| return None | |
| parts = hostname.split('.') | |
| if len(parts) <= 2: | |
| return hostname | |
| return '.'.join(parts[-2:]) | |
| def humanize_ago(dt): | |
| now = datetime.now(timezone.utc) | |
| dt_utc = dt.astimezone(timezone.utc) | |
| return humanize.naturaltime(now - dt_utc).capitalize() | |
| def clean_text(str_html): | |
| # Fix missing the leading & | |
| str_html = html.unescape(re.sub(r'#(\d+);', r'&#\1;', str_html)) | |
| # Remove all img tags | |
| soup = BeautifulSoup(str_html, "html.parser") | |
| for img in soup.find_all("img"): | |
| img.decompose() | |
| str_html = str(soup) | |
| # Return | |
| return str_html | |
| def clean_text_2(text): | |
| # Remove all a tags | |
| soup = BeautifulSoup(text, "html.parser") | |
| for e in soup.find_all("a"): | |
| e.decompose() | |
| text = str(soup) | |
| # Remove all HTML tags | |
| soup = BeautifulSoup(text, "html.parser") | |
| text = soup.get_text(separator=" ", strip=True) | |
| # Remove all newline characters | |
| return text.replace("\n", " ").replace("\t", " ").replace("<br>", " ") | |
| # ==================================================================================================== | |
| theme = gr.themes.Base( | |
| primary_hue="neutral", | |
| secondary_hue="neutral", | |
| neutral_hue="neutral", | |
| text_size="lg", | |
| font=[gr.themes.GoogleFont('Inter')], | |
| font_mono=[gr.themes.GoogleFont('Ubuntu Mono')], | |
| ) | |
| head = """ | |
| <link rel="icon" href="https://cdn.jsdelivr.net/gh/OneLevelStudio/CORE/STATIC/1LV_LOGO_DARK.png"> | |
| """ | |
| # * { -ms-overflow-style: none; scrollbar-width: none; } | |
| # *::-webkit-scrollbar { display: none; } | |
| css = """ | |
| #huggingface-space-header { display: none !important; } | |
| footer { display: none !important; } | |
| main { | |
| padding: 0 !important; | |
| max-width: 100% !important; | |
| } | |
| textarea { | |
| padding-top: 5px !important; | |
| padding-bottom: 6px !important; | |
| } | |
| .row { | |
| gap: 0 !important; | |
| } | |
| .gr_Markdown { | |
| background: transparent !important; | |
| border: none !important; | |
| padding: 14px 16px 16px 16px !important; | |
| text-align: justify; | |
| } | |
| .gr_Button { | |
| margin: 32px !important; | |
| width: initial !important; | |
| font-size: 16px !important; | |
| padding: 10px 12px 12px 12px !important; | |
| } | |
| /* ---------- Scrollbar ---------- */ | |
| ::-webkit-scrollbar { | |
| background: transparent; | |
| width: 8px; | |
| border-radius: 999px; | |
| } | |
| ::-webkit-scrollbar-track { | |
| background: transparent; | |
| border-radius: 999px; | |
| } | |
| ::-webkit-scrollbar-thumb { | |
| background: hsla(0, 0%, 50%, 0.5); | |
| border-radius: 999px; | |
| } | |
| ::-webkit-scrollbar-thumb:hover { | |
| background: hsla(0, 0%, 50%, 0.9); | |
| } | |
| /* ---------- Desktop/Mobile Only ---------- */ | |
| .desktop-only { | |
| display: block; | |
| } | |
| @media only screen and (max-width: 1000px) { | |
| .desktop-only { | |
| display: none; | |
| } | |
| } | |
| .mobile-only { | |
| display: block; | |
| } | |
| @media only screen and (min-width: 1000px) { | |
| .mobile-only { | |
| display: none; | |
| } | |
| } | |
| /* ---------- ---------- */ | |
| #all-news-items { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 16px; | |
| height: 100svh; | |
| overflow-y: scroll; | |
| border-left: solid 1px hsla(0, 0%, 50%, .2); | |
| border-right: solid 1px hsla(0, 0%, 50%, .2); | |
| padding: 32px 24px 32px 32px; | |
| } | |
| .news-item { | |
| border-radius: 8px; | |
| background: hsla(0, 0%, 100%, 0.05); | |
| border: solid 1px hsla(0, 0%, 100%, 0.05); | |
| padding: 16px 18px; | |
| } | |
| .news-item a { | |
| padding: 0 !important; | |
| text-align: left !important; | |
| } | |
| .news-item a .news-title { | |
| font-size: 20px !important; | |
| font-weight: 600 !important; | |
| line-height: 1.3 !important; | |
| margin: 0 !important; | |
| color: white !important; | |
| } | |
| .news-info, .news-info * { | |
| font-size: 14px !important; | |
| color: grey !important; | |
| } | |
| .news-summary, .news-summary * { | |
| font-size: 14px !important; | |
| color: grey !important; | |
| margin: 0 !important; | |
| line-height: 1.5 !important; | |
| text-align: justify; | |
| } | |
| .news-info { | |
| margin-bottom: 4px !important; | |
| } | |
| .news-summary { | |
| margin-top: 8px !important; | |
| } | |
| """ | |
| offspellcheck = gr.InputHTMLAttributes(autocorrect="off", spellcheck=False) | |
| # ==================================================================================================== | |
| def fetch_all_rss(): | |
| print(f"> Fetching new RSS...") | |
| # time.sleep(5) | |
| return rss_spider() | |
| # ==================================================================================================== | |
| NEWS_ALL_ENTRIES = [] | |
| NEWS_LAST_UPDATE = datetime(2001, 1, 1) | |
| SUMMARY_LLM = "" | |
| SUMMARY_LAST_UPDATE = datetime(2001, 1, 1) | |
| def fn_btn_manual_fetch(): | |
| # ---------------------------------------------------------------------------------------------------- | |
| global NEWS_ALL_ENTRIES | |
| global NEWS_LAST_UPDATE | |
| if datetime.now() - NEWS_LAST_UPDATE > timedelta(minutes=10): | |
| NEWS_LAST_UPDATE = datetime.now() | |
| NEWS_ALL_ENTRIES = fetch_all_rss() | |
| else: | |
| time.sleep(2) | |
| # ---------------------------------------------------------------------------------------------------- | |
| return display_all_entries() | |
| def display_all_entries(): | |
| # ---------------------------------------------------------------------------------------------------- | |
| global NEWS_ALL_ENTRIES | |
| global NEWS_LAST_UPDATE | |
| if datetime.now() - NEWS_LAST_UPDATE > timedelta(minutes=60): | |
| NEWS_LAST_UPDATE = datetime.now() | |
| NEWS_ALL_ENTRIES = fetch_all_rss() | |
| else: | |
| pass | |
| # ---------------------------------------------------------------------------------------------------- | |
| html_content = "" | |
| html_content += "<div id='all-news-items'>" | |
| for e in NEWS_ALL_ENTRIES: | |
| html_content += f""" | |
| <div class='news-item'> | |
| <div class='news-info'> | |
| {humanize_ago(e['time'])} • {get_main_domain(e['link'])} | |
| </div> | |
| <a target='_blank' href={e['link']}> | |
| <p class='news-title'> | |
| {clean_text(e['title'])} ↗ | |
| </p> | |
| </a> | |
| <div class='news-summary'> | |
| {clean_text(e['summary'])} | |
| </div> | |
| </div> | |
| """ | |
| html_content += "</div>" | |
| return html_content | |
| def fn_llm_summarize(): | |
| # ---------------------------------------------------------------------------------------------------- | |
| global NEWS_ALL_ENTRIES | |
| global SUMMARY_LLM | |
| global SUMMARY_LAST_UPDATE | |
| if datetime.now() - SUMMARY_LAST_UPDATE > timedelta(minutes=5): | |
| SUMMARY_LAST_UPDATE = datetime.now() | |
| # ---------- | |
| inputtext_news = "" | |
| for e in NEWS_ALL_ENTRIES[:50]: | |
| inputtext_news += f"""{humanize_ago(e['time'])} - {get_main_domain(e['link'])} - "{clean_text(e['title'])}" ({clean_text_2(clean_text(e['summary']))})\n""" | |
| # ---------- | |
| my_prompt = f"""\ | |
| Dưới đây là những tiêu đề báo mới nhất. Tóm tắt và phân tích tình hình thị trường một cách chuyên nghiệp. | |
| Không chào hỏi, không giới thiệu, không tương tác với người dùng; chỉ tập trung vào việc tóm tắt và phân tích. | |
| ----- | |
| {inputtext_news}\ | |
| -----\ | |
| """ | |
| # print("--------------------------------------------------") | |
| # print(my_prompt) | |
| # print("--------------------------------------------------") | |
| # ---------- | |
| SUMMARY_LLM = "" | |
| llm_res_stream = vendor_llm_endpoint.chat.completions.create( | |
| model=vendor_llm_model, | |
| messages=[{"role": "user", "content": my_prompt}], | |
| stream=True, | |
| ) | |
| for event in llm_res_stream: | |
| SUMMARY_LLM += event.choices[0].delta.content | |
| yield SUMMARY_LLM | |
| # ---------- | |
| else: | |
| time.sleep(2) | |
| # ---------------------------------------------------------------------------------------------------- | |
| yield SUMMARY_LLM | |
| # ==================================================================================================== | |
| with gr.Blocks(title="Tracking Spider") as demo: | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| btn_manual_fetch = gr.Button("🕷 Tracking Spider", elem_classes="gr_Button desktop-only") | |
| gr.HTML(f""" | |
| <iframe scrolling='no' class='desktop-only' style='width: 100%; height: 600px; margin-top: 10svh;' srcdoc=' | |
| <!-- TradingView Widget BEGIN --> | |
| <div class="tradingview-widget-container"> | |
| <div class="tradingview-widget-container__widget"></div> | |
| <div class="tradingview-widget-copyright"><a href="https://www.tradingview.com/symbols/HOSE-VNINDEX/technicals/" rel="noopener nofollow" target="_blank"><span class="blue-text">VNINDEX analysis</span></a><span class="trademark"> by TradingView</span></div> | |
| <script type="text/javascript" src="https://s3.tradingview.com/external-embedding/embed-widget-technical-analysis.js" async> | |
| {{ | |
| "colorTheme": "dark", | |
| "displayMode": "single", | |
| "isTransparent": true, | |
| "locale": "en", | |
| "interval": "1h", | |
| "disableInterval": false, | |
| "largeChartUrl": "https://www.tradingview.com/support/solutions/43000614331/", | |
| "width": "100%", | |
| "height": 1000, | |
| "symbol": "HOSE:VNINDEX", | |
| "showIntervalTabs": true | |
| }} | |
| </script> | |
| </div> | |
| <!-- TradingView Widget END --> | |
| '></iframe> | |
| """) | |
| with gr.Column(scale=4): | |
| display_all_news = gr.HTML(container=False) | |
| with gr.Column(scale=2): | |
| display_llm_summary = gr.Markdown(container=True, height="100svh", elem_classes="gr_Markdown") | |
| # ----- | |
| demo.load( | |
| fn=lambda: display_all_entries(), | |
| inputs=[], | |
| outputs=[display_all_news], | |
| show_progress="full", | |
| ).then( | |
| fn=fn_llm_summarize, | |
| inputs=[], | |
| outputs=[display_llm_summary], | |
| show_progress="full", | |
| ) | |
| # ----- | |
| gr.on( | |
| triggers=btn_manual_fetch.click, | |
| fn=fn_btn_manual_fetch, | |
| inputs=[], | |
| outputs=[display_all_news], | |
| show_progress="full", | |
| ).then( | |
| fn=fn_llm_summarize, | |
| inputs=[], | |
| outputs=[display_llm_summary], | |
| show_progress="full", | |
| ) | |
| demo.launch(head=head, css=css, theme=theme) |