baobuiquang commited on
Commit
8009463
·
verified ·
1 Parent(s): 81867b7
Files changed (1) hide show
  1. app.py +338 -0
app.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timezone, timedelta
2
+ from dateutil.tz import tzoffset, tzutc
3
+ from urllib.parse import urlparse
4
+ from bs4 import BeautifulSoup
5
+ import gradio as gr
6
+ import humanize
7
+ import html
8
+ import json
9
+ import time
10
+ import re
11
+ import os
12
+
13
+ from _vendor import vendor_llm_endpoint, vendor_llm_model
14
+ from _spider import rss_spider
15
+
16
+ # ====================================================================================================
17
+
18
+ def get_main_domain(url):
19
+ if "nld.com.vn" in url:
20
+ return "nld.com.vn"
21
+ if "sggp.org.vn" in url:
22
+ return "sggp.org.vn"
23
+ hostname = urlparse(url).hostname
24
+ if hostname is None:
25
+ return None
26
+ parts = hostname.split('.')
27
+ if len(parts) <= 2:
28
+ return hostname
29
+ return '.'.join(parts[-2:])
30
+
31
+ def humanize_ago(dt):
32
+ now = datetime.now(timezone.utc)
33
+ dt_utc = dt.astimezone(timezone.utc)
34
+ return humanize.naturaltime(now - dt_utc).capitalize()
35
+
36
+ def clean_text(str_html):
37
+ # Fix missing the leading &
38
+ str_html = html.unescape(re.sub(r'#(\d+);', r'&#\1;', str_html))
39
+ # Remove all img tags
40
+ soup = BeautifulSoup(str_html, "html.parser")
41
+ for img in soup.find_all("img"):
42
+ img.decompose()
43
+ str_html = str(soup)
44
+ # Return
45
+ return str_html
46
+
47
+ def clean_text_2(text):
48
+ # Remove all a tags
49
+ soup = BeautifulSoup(text, "html.parser")
50
+ for e in soup.find_all("a"):
51
+ e.decompose()
52
+ text = str(soup)
53
+ # Remove all HTML tags
54
+ soup = BeautifulSoup(text, "html.parser")
55
+ text = soup.get_text(separator=" ", strip=True)
56
+ # Remove all newline characters
57
+ return text.replace("\n", " ").replace("\t", " ").replace("<br>", " ")
58
+
59
+ # ====================================================================================================
60
+
61
+ theme = gr.themes.Base(
62
+ primary_hue="neutral",
63
+ secondary_hue="neutral",
64
+ neutral_hue="neutral",
65
+ text_size="lg",
66
+ font=[gr.themes.GoogleFont('Inter')],
67
+ font_mono=[gr.themes.GoogleFont('Ubuntu Mono')],
68
+ )
69
+ head = """
70
+ <link rel="icon" href="https://cdn.jsdelivr.net/gh/OneLevelStudio/CORE/STATIC/1LV_LOGO_DARK.png">
71
+ """
72
+ # * { -ms-overflow-style: none; scrollbar-width: none; }
73
+ # *::-webkit-scrollbar { display: none; }
74
+ css = """
75
+ #huggingface-space-header { display: none !important; }
76
+ footer { display: none !important; }
77
+ main {
78
+ padding: 0 !important;
79
+ max-width: 100% !important;
80
+ }
81
+ textarea {
82
+ padding-top: 5px !important;
83
+ padding-bottom: 6px !important;
84
+ }
85
+ .row {
86
+ gap: 0 !important;
87
+ }
88
+ .gr_Markdown {
89
+ background: transparent !important;
90
+ border: none !important;
91
+ padding: 14px 16px 16px 16px !important;
92
+ text-align: justify;
93
+ }
94
+ .gr_Button {
95
+ margin: 32px !important;
96
+ width: initial !important;
97
+ font-size: 16px !important;
98
+ padding: 10px 12px 12px 12px !important;
99
+ }
100
+ /* ---------- Scrollbar ---------- */
101
+ ::-webkit-scrollbar {
102
+ background: transparent;
103
+ width: 8px;
104
+ border-radius: 999px;
105
+ }
106
+ ::-webkit-scrollbar-track {
107
+ background: transparent;
108
+ border-radius: 999px;
109
+ }
110
+ ::-webkit-scrollbar-thumb {
111
+ background: hsla(0, 0%, 50%, 0.5);
112
+ border-radius: 999px;
113
+ }
114
+ ::-webkit-scrollbar-thumb:hover {
115
+ background: hsla(0, 0%, 50%, 0.9);
116
+ }
117
+ /* ---------- Desktop/Mobile Only ---------- */
118
+ .desktop-only {
119
+ display: block;
120
+ }
121
+ @media only screen and (max-width: 1000px) {
122
+ .desktop-only {
123
+ display: none;
124
+ }
125
+ }
126
+ .mobile-only {
127
+ display: block;
128
+ }
129
+ @media only screen and (min-width: 1000px) {
130
+ .mobile-only {
131
+ display: none;
132
+ }
133
+ }
134
+ /* ---------- ---------- */
135
+ #all-news-items {
136
+ display: flex;
137
+ flex-direction: column;
138
+ gap: 16px;
139
+ height: 100svh;
140
+ overflow-y: scroll;
141
+ border-left: solid 1px hsla(0, 0%, 50%, .2);
142
+ border-right: solid 1px hsla(0, 0%, 50%, .2);
143
+ padding: 32px 24px 32px 32px;
144
+ }
145
+ .news-item {
146
+ border-radius: 8px;
147
+ background: hsla(0, 0%, 100%, 0.05);
148
+ border: solid 1px hsla(0, 0%, 100%, 0.05);
149
+ padding: 16px 18px;
150
+ }
151
+ .news-item a {
152
+ padding: 0 !important;
153
+ text-align: left !important;
154
+ }
155
+ .news-item a .news-title {
156
+ font-size: 20px !important;
157
+ font-weight: 600 !important;
158
+ line-height: 1.3 !important;
159
+ margin: 0 !important;
160
+ color: white !important;
161
+ }
162
+ .news-info, .news-info * {
163
+ font-size: 14px !important;
164
+ color: grey !important;
165
+ }
166
+ .news-summary, .news-summary * {
167
+ font-size: 14px !important;
168
+ color: grey !important;
169
+ margin: 0 !important;
170
+ line-height: 1.5 !important;
171
+ text-align: justify;
172
+ }
173
+ .news-info {
174
+ margin-bottom: 4px !important;
175
+ }
176
+ .news-summary {
177
+ margin-top: 8px !important;
178
+ }
179
+ """
180
+ offspellcheck = gr.InputHTMLAttributes(autocorrect="off", spellcheck=False)
181
+
182
+ # ====================================================================================================
183
+
184
+ def fetch_all_rss():
185
+ print(f"> Fetching new RSS...")
186
+ # time.sleep(5)
187
+ return rss_spider()
188
+
189
+ # ====================================================================================================
190
+
191
+ NEWS_ALL_ENTRIES = []
192
+ NEWS_LAST_UPDATE = datetime(2001, 1, 1)
193
+ SUMMARY_LLM = ""
194
+ SUMMARY_LAST_UPDATE = datetime(2001, 1, 1)
195
+
196
+ def fn_btn_manual_fetch():
197
+ # ----------------------------------------------------------------------------------------------------
198
+ global NEWS_ALL_ENTRIES
199
+ global NEWS_LAST_UPDATE
200
+ if datetime.now() - NEWS_LAST_UPDATE > timedelta(minutes=10):
201
+ NEWS_LAST_UPDATE = datetime.now()
202
+ NEWS_ALL_ENTRIES = fetch_all_rss()
203
+ else:
204
+ time.sleep(2)
205
+ # ----------------------------------------------------------------------------------------------------
206
+ return display_all_entries()
207
+
208
+ def display_all_entries():
209
+ # ----------------------------------------------------------------------------------------------------
210
+ global NEWS_ALL_ENTRIES
211
+ global NEWS_LAST_UPDATE
212
+ if datetime.now() - NEWS_LAST_UPDATE > timedelta(minutes=120):
213
+ NEWS_LAST_UPDATE = datetime.now()
214
+ NEWS_ALL_ENTRIES = fetch_all_rss()
215
+ else:
216
+ pass
217
+ # ----------------------------------------------------------------------------------------------------
218
+ html_content = ""
219
+ html_content += "<div id='all-news-items'>"
220
+ for e in NEWS_ALL_ENTRIES:
221
+ html_content += f"""
222
+ <div class='news-item'>
223
+ <div class='news-info'>
224
+ {humanize_ago(e['time'])} • {get_main_domain(e['link'])}
225
+ </div>
226
+ <a target='_blank' href={e['link']}>
227
+ <p class='news-title'>
228
+ {clean_text(e['title'])} ↗
229
+ </p>
230
+ </a>
231
+ <div class='news-summary'>
232
+ {clean_text(e['summary'])}
233
+ </div>
234
+ </div>
235
+ """
236
+ html_content += "</div>"
237
+ return html_content
238
+
239
+ def fn_llm_summarize():
240
+ # ----------------------------------------------------------------------------------------------------
241
+ global NEWS_ALL_ENTRIES
242
+ global SUMMARY_LLM
243
+ global SUMMARY_LAST_UPDATE
244
+ if datetime.now() - SUMMARY_LAST_UPDATE > timedelta(minutes=10):
245
+ SUMMARY_LAST_UPDATE = datetime.now()
246
+ # ----------
247
+ inputtext_news = ""
248
+ for e in NEWS_ALL_ENTRIES[:50]:
249
+ inputtext_news += f"""{humanize_ago(e['time'])} - {get_main_domain(e['link'])} - "{clean_text(e['title'])}" ({clean_text_2(clean_text(e['summary']))})\n"""
250
+ # ----------
251
+ my_prompt = f"""\
252
+ Dưới đây là những tiêu đề báo mới nhất. Tóm tắt và phân tích tình hình thị trường một cách chuyên nghiệp.
253
+ Không chào hỏi, không giới thiệu, không tương tác với người dùng; chỉ tập trung vào việc tóm tắt và phân tích.
254
+ -----
255
+ {inputtext_news}\
256
+ -----\
257
+ """
258
+ # print("--------------------------------------------------")
259
+ # print(my_prompt)
260
+ # print("--------------------------------------------------")
261
+ # ----------
262
+ SUMMARY_LLM = ""
263
+ llm_res_stream = vendor_llm_endpoint.chat.completions.create(
264
+ model=vendor_llm_model,
265
+ messages=[{"role": "user", "content": my_prompt}],
266
+ stream=True,
267
+ )
268
+ for event in llm_res_stream:
269
+ SUMMARY_LLM += event.choices[0].delta.content
270
+ yield SUMMARY_LLM
271
+ # ----------
272
+ else:
273
+ time.sleep(2)
274
+ # ----------------------------------------------------------------------------------------------------
275
+ yield SUMMARY_LLM
276
+
277
+ # ====================================================================================================
278
+
279
+ with gr.Blocks(title="Tracking Spider") as demo:
280
+ with gr.Row():
281
+ with gr.Column(scale=2):
282
+ btn_manual_fetch = gr.Button("🕷 Tracking Spider", elem_classes="gr_Button desktop-only")
283
+ gr.HTML(f"""
284
+ <iframe scrolling='no' class='desktop-only' style='width: 100%; height: 600px; margin-top: 10svh;' srcdoc='
285
+ <!-- TradingView Widget BEGIN -->
286
+ <div class="tradingview-widget-container">
287
+ <div class="tradingview-widget-container__widget"></div>
288
+ <div class="tradingview-widget-copyright"><a href="https://www.tradingview.com/symbols/HOSE-VNINDEX/technicals/" rel="noopener nofollow" target="_blank"><span class="blue-text">VNINDEX analysis</span></a><span class="trademark"> by TradingView</span></div>
289
+ <script type="text/javascript" src="https://s3.tradingview.com/external-embedding/embed-widget-technical-analysis.js" async>
290
+ {{
291
+ "colorTheme": "dark",
292
+ "displayMode": "single",
293
+ "isTransparent": true,
294
+ "locale": "en",
295
+ "interval": "1h",
296
+ "disableInterval": false,
297
+ "largeChartUrl": "https://www.tradingview.com/support/solutions/43000614331/",
298
+ "width": "100%",
299
+ "height": 1000,
300
+ "symbol": "HOSE:VNINDEX",
301
+ "showIntervalTabs": true
302
+ }}
303
+ </script>
304
+ </div>
305
+ <!-- TradingView Widget END -->
306
+ '></iframe>
307
+ """)
308
+ with gr.Column(scale=4):
309
+ display_all_news = gr.HTML(container=False)
310
+ with gr.Column(scale=2):
311
+ display_llm_summary = gr.Markdown(container=True, height="100svh", elem_classes="gr_Markdown")
312
+ # -----
313
+ demo.load(
314
+ fn=lambda: display_all_entries(),
315
+ inputs=[],
316
+ outputs=[display_all_news],
317
+ show_progress="full",
318
+ ).then(
319
+ fn=fn_llm_summarize,
320
+ inputs=[],
321
+ outputs=[display_llm_summary],
322
+ show_progress="full",
323
+ )
324
+ # -----
325
+ gr.on(
326
+ triggers=btn_manual_fetch.click,
327
+ fn=fn_btn_manual_fetch,
328
+ inputs=[],
329
+ outputs=[display_all_news],
330
+ show_progress="full",
331
+ ).then(
332
+ fn=fn_llm_summarize,
333
+ inputs=[],
334
+ outputs=[display_llm_summary],
335
+ show_progress="full",
336
+ )
337
+
338
+ demo.launch(head=head, css=css, theme=theme)