Dmitry Beresnev commited on
Commit
a8aef2e
·
1 Parent(s): 7ca0d9b

optimize news module performance, improve news UI, etc

Browse files
app/components/news.py CHANGED
@@ -6,8 +6,8 @@ from datetime import datetime
6
  import html as html_module
7
 
8
 
9
- def display_news_card(news_item: dict):
10
- """Display a single news card with professional styling using Streamlit components."""
11
 
12
  # Calculate time ago
13
  time_diff = datetime.now() - news_item['timestamp']
@@ -16,76 +16,242 @@ def display_news_card(news_item: dict):
16
  elif time_diff.seconds < 3600:
17
  time_ago = f"{time_diff.seconds // 60}m ago"
18
  else:
19
- time_ago = f"{time_diff.seconds // 3600}h ago"
 
 
 
 
 
 
 
 
20
 
21
- # Create container with custom styling
22
- with st.container():
23
- # Add custom CSS for this card
24
- st.markdown("""
25
- <style>
26
- .news-card {
27
- background: linear-gradient(135deg, #1f2937 0%, #111827 100%);
28
- border: 1px solid #374151;
29
- border-radius: 12px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  padding: 20px;
31
- margin-bottom: 16px;
32
- }
33
- </style>
 
 
34
  """, unsafe_allow_html=True)
 
35
 
36
- # Header row with source and badges
37
- col1, col2 = st.columns([3, 1])
38
-
39
- with col1:
40
- # Source and badges in one line
41
- badge_cols = st.columns([2, 1, 1, 1])
42
- with badge_cols[0]:
43
- st.markdown(f"**:blue[{news_item['source']}]**")
44
- with badge_cols[1]:
45
- if news_item['impact'] == 'high':
46
- st.markdown("🔴 **HIGH**")
47
- elif news_item['impact'] == 'medium':
48
- st.markdown("🟡 **MED**")
49
- else:
50
- st.markdown("🟢 **LOW**")
51
- with badge_cols[2]:
52
- sentiment_emoji = {'positive': '📈', 'negative': '📉', 'neutral': '➡️'}
53
- st.markdown(f"{sentiment_emoji.get(news_item['sentiment'], '➡️')} {news_item['sentiment'].title()}")
54
- with badge_cols[3]:
55
- st.markdown(f"**#{news_item['category']}**")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- with col2:
58
- st.markdown(f"[**Read More →**]({news_item['url']})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- # Title/Summary
61
- st.markdown(f"### {news_item.get('summary', '').strip()}")
62
 
63
- # Meta information
64
- st.caption(f"🕐 {time_ago}")
 
65
 
66
- st.markdown("---")
 
67
 
68
 
69
  def display_news_feed(df: pd.DataFrame, max_items: int = 20):
70
- """Display a feed of news items."""
71
 
72
  if df.empty:
73
  st.info("📭 No news available. Adjust your filters or refresh the feed.")
74
  return
75
 
76
- # Add custom CSS for animations
77
- st.markdown("""
78
- <style>
79
- @keyframes pulse {
80
- 0%, 100% { opacity: 1; }
81
- 50% { opacity: 0.6; }
82
- }
83
- </style>
84
- """, unsafe_allow_html=True)
85
-
86
  # Display news items
87
  for idx, row in df.head(max_items).iterrows():
88
- display_news_card(row.to_dict())
89
 
90
 
91
  def display_news_statistics(stats: dict):
@@ -167,36 +333,142 @@ def display_category_breakdown(stats: dict):
167
 
168
 
169
  def display_breaking_news_banner(df: pd.DataFrame):
170
- """Display breaking news banner at the top using Streamlit components."""
171
 
172
  breaking = df[df['is_breaking'] == True] if not df.empty and 'is_breaking' in df.columns else pd.DataFrame()
173
 
174
  if not breaking.empty:
175
  latest = breaking.iloc[0]
176
 
177
- # Create container with red background styling
178
- with st.container():
179
- st.markdown("""
180
- <style>
181
- .breaking-news-container {
182
- background: linear-gradient(135deg, #dc2626 0%, #991b1b 100%);
183
- border-radius: 12px;
184
- padding: 20px;
185
- margin-bottom: 24px;
186
- border: 2px solid #fca5a5;
187
- }
188
- </style>
189
- """, unsafe_allow_html=True)
190
-
191
- # Layout with emoji and content
192
- col1, col2, col3 = st.columns([1, 8, 2])
193
-
194
- with col1:
195
- st.markdown("# 🚨")
196
-
197
- with col2:
198
- st.markdown(f"**:red[BREAKING NEWS • {latest['source'].upper()}]**")
199
- st.markdown(f"## {latest.get('summary', '').strip()}")
200
-
201
- with col3:
202
- st.markdown(f"[**READ NOW →**]({latest['url']})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import html as html_module
7
 
8
 
9
+ def display_tradingview_news_card(news_item: dict):
10
+ """Display a single news card with TradingView-inspired styling."""
11
 
12
  # Calculate time ago
13
  time_diff = datetime.now() - news_item['timestamp']
 
16
  elif time_diff.seconds < 3600:
17
  time_ago = f"{time_diff.seconds // 60}m ago"
18
  else:
19
+ hours = time_diff.seconds // 3600
20
+ time_ago = f"{hours}h ago" if hours < 24 else f"{time_diff.days}d ago"
21
+
22
+ # Impact badge colors (TradingView style)
23
+ impact_colors = {
24
+ 'high': '#F23645', # Red
25
+ 'medium': '#FF9800', # Orange
26
+ 'low': '#089981' # Green
27
+ }
28
 
29
+ # Sentiment colors
30
+ sentiment_colors = {
31
+ 'positive': '#089981', # Green
32
+ 'negative': '#F23645', # Red
33
+ 'neutral': '#787B86' # Gray
34
+ }
35
+
36
+ impact_color = impact_colors.get(news_item['impact'], '#787B86')
37
+ sentiment_color = sentiment_colors.get(news_item['sentiment'], '#787B86')
38
+
39
+ # Escape HTML in text
40
+ summary = html_module.escape(news_item.get('summary', '').strip())
41
+ source = html_module.escape(news_item['source'])
42
+ category = html_module.escape(news_item['category'])
43
+ url = html_module.escape(news_item['url'])
44
+
45
+ # TradingView-style card HTML
46
+ card_html = f"""
47
+ <div style="
48
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
49
+ border: 1px solid #2A2E39;
50
+ border-radius: 8px;
51
+ padding: 16px;
52
+ margin-bottom: 12px;
53
+ transition: all 0.2s ease;
54
+ cursor: pointer;
55
+ position: relative;
56
+ overflow: hidden;
57
+ " onmouseover="this.style.borderColor='#3861FB'; this.style.transform='translateY(-2px)'; this.style.boxShadow='0 4px 12px rgba(56, 97, 251, 0.15)';"
58
+ onmouseout="this.style.borderColor='#2A2E39'; this.style.transform='translateY(0)'; this.style.boxShadow='none';">
59
+
60
+ <!-- Left colored indicator bar -->
61
+ <div style="
62
+ position: absolute;
63
+ left: 0;
64
+ top: 0;
65
+ bottom: 0;
66
+ width: 3px;
67
+ background: {impact_color};
68
+ "></div>
69
+
70
+ <!-- Header row -->
71
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; margin-left: 8px;">
72
+ <div style="display: flex; align-items: center; gap: 8px; flex-wrap: wrap;">
73
+ <span style="
74
+ color: #3861FB;
75
+ font-weight: 600;
76
+ font-size: 13px;
77
+ font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;
78
+ ">{source}</span>
79
+
80
+ <span style="
81
+ background: {impact_color};
82
+ color: white;
83
+ padding: 2px 8px;
84
+ border-radius: 4px;
85
+ font-size: 10px;
86
+ font-weight: 700;
87
+ letter-spacing: 0.5px;
88
+ ">{news_item['impact'].upper()}</span>
89
+
90
+ <span style="
91
+ color: {sentiment_color};
92
+ font-size: 11px;
93
+ font-weight: 600;
94
+ padding: 2px 6px;
95
+ border: 1px solid {sentiment_color};
96
+ border-radius: 4px;
97
+ ">{'▲' if news_item['sentiment'] == 'positive' else '▼' if news_item['sentiment'] == 'negative' else '●'} {news_item['sentiment'].upper()}</span>
98
+
99
+ <span style="
100
+ color: #787B86;
101
+ font-size: 11px;
102
+ background: rgba(120, 123, 134, 0.1);
103
+ padding: 2px 6px;
104
+ border-radius: 4px;
105
+ ">#{category}</span>
106
+ </div>
107
+
108
+ <span style="color: #787B86; font-size: 11px; white-space: nowrap;">{time_ago}</span>
109
+ </div>
110
+
111
+ <!-- News summary -->
112
+ <div style="
113
+ color: #D1D4DC;
114
+ font-size: 14px;
115
+ line-height: 1.5;
116
+ margin-bottom: 8px;
117
+ margin-left: 8px;
118
+ font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;
119
+ ">{summary}</div>
120
+
121
+ <!-- Read more link -->
122
+ <a href="{url}" target="_blank" style="
123
+ color: #3861FB;
124
+ font-size: 12px;
125
+ text-decoration: none;
126
+ margin-left: 8px;
127
+ display: inline-flex;
128
+ align-items: center;
129
+ gap: 4px;
130
+ font-weight: 500;
131
+ " onmouseover="this.style.color='#5880FF';" onmouseout="this.style.color='#3861FB';">
132
+ Read Full Story →
133
+ </a>
134
+ </div>
135
+ """
136
+
137
+ st.markdown(card_html, unsafe_allow_html=True)
138
+
139
+
140
+ def display_news_card(news_item: dict):
141
+ """Wrapper to maintain compatibility - calls TradingView-style card."""
142
+ display_tradingview_news_card(news_item)
143
+
144
+
145
+ def display_scrollable_news_section(df: pd.DataFrame, section_title: str, section_icon: str,
146
+ section_subtitle: str, max_items: int = 20, height: str = "600px"):
147
+ """Display a scrollable news section with TradingView styling."""
148
+
149
+ if df.empty:
150
+ st.markdown(f"""
151
+ <div style="
152
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
153
+ border: 1px solid #2A2E39;
154
+ border-radius: 8px;
155
  padding: 20px;
156
+ text-align: center;
157
+ color: #787B86;
158
+ ">
159
+ <p style="font-size: 16px; margin: 0;">📭 No news available for this section</p>
160
+ </div>
161
  """, unsafe_allow_html=True)
162
+ return
163
 
164
+ # Section header
165
+ st.markdown(f"""
166
+ <div style="
167
+ background: linear-gradient(135deg, #2A2E39 0%, #1E222D 100%);
168
+ border: 1px solid #363A45;
169
+ border-radius: 8px 8px 0 0;
170
+ padding: 16px 20px;
171
+ margin-bottom: 0;
172
+ ">
173
+ <div style="display: flex; justify-content: space-between; align-items: center;">
174
+ <div>
175
+ <h3 style="
176
+ color: #D1D4DC;
177
+ margin: 0;
178
+ font-size: 18px;
179
+ font-weight: 600;
180
+ font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;
181
+ ">{section_icon} {section_title}</h3>
182
+ <p style="
183
+ color: #787B86;
184
+ margin: 4px 0 0 0;
185
+ font-size: 12px;
186
+ ">{section_subtitle}</p>
187
+ </div>
188
+ <div style="
189
+ background: rgba(56, 97, 251, 0.15);
190
+ color: #3861FB;
191
+ padding: 6px 12px;
192
+ border-radius: 6px;
193
+ font-size: 13px;
194
+ font-weight: 600;
195
+ ">{len(df.head(max_items))} stories</div>
196
+ </div>
197
+ </div>
198
+ """, unsafe_allow_html=True)
199
 
200
+ # Scrollable container with TradingView-style scrollbar
201
+ st.markdown(f"""
202
+ <style>
203
+ .news-scroll-container {{
204
+ height: {height};
205
+ overflow-y: auto;
206
+ background: #0D0E13;
207
+ border: 1px solid #2A2E39;
208
+ border-top: none;
209
+ border-radius: 0 0 8px 8px;
210
+ padding: 16px;
211
+ }}
212
+
213
+ /* TradingView-style scrollbar */
214
+ .news-scroll-container::-webkit-scrollbar {{
215
+ width: 8px;
216
+ }}
217
+
218
+ .news-scroll-container::-webkit-scrollbar-track {{
219
+ background: #1E222D;
220
+ border-radius: 4px;
221
+ }}
222
+
223
+ .news-scroll-container::-webkit-scrollbar-thumb {{
224
+ background: #363A45;
225
+ border-radius: 4px;
226
+ }}
227
+
228
+ .news-scroll-container::-webkit-scrollbar-thumb:hover {{
229
+ background: #434651;
230
+ }}
231
+ </style>
232
+ """, unsafe_allow_html=True)
233
 
234
+ # Start scrollable container
235
+ st.markdown('<div class="news-scroll-container">', unsafe_allow_html=True)
236
 
237
+ # Display news items
238
+ for idx, row in df.head(max_items).iterrows():
239
+ display_tradingview_news_card(row.to_dict())
240
 
241
+ # End scrollable container
242
+ st.markdown('</div>', unsafe_allow_html=True)
243
 
244
 
245
  def display_news_feed(df: pd.DataFrame, max_items: int = 20):
246
+ """Display a feed of news items (legacy compatibility)."""
247
 
248
  if df.empty:
249
  st.info("📭 No news available. Adjust your filters or refresh the feed.")
250
  return
251
 
 
 
 
 
 
 
 
 
 
 
252
  # Display news items
253
  for idx, row in df.head(max_items).iterrows():
254
+ display_tradingview_news_card(row.to_dict())
255
 
256
 
257
  def display_news_statistics(stats: dict):
 
333
 
334
 
335
  def display_breaking_news_banner(df: pd.DataFrame):
336
+ """Display breaking news banner at the top with TradingView styling."""
337
 
338
  breaking = df[df['is_breaking'] == True] if not df.empty and 'is_breaking' in df.columns else pd.DataFrame()
339
 
340
  if not breaking.empty:
341
  latest = breaking.iloc[0]
342
 
343
+ # Escape HTML
344
+ summary = html_module.escape(latest.get('summary', '').strip())
345
+ source = html_module.escape(latest['source'])
346
+ url = html_module.escape(latest['url'])
347
+
348
+ # Calculate time ago
349
+ time_diff = datetime.now() - latest['timestamp']
350
+ if time_diff.seconds < 60:
351
+ time_ago = f"{time_diff.seconds}s ago"
352
+ elif time_diff.seconds < 3600:
353
+ time_ago = f"{time_diff.seconds // 60}m ago"
354
+ else:
355
+ hours = time_diff.seconds // 3600
356
+ time_ago = f"{hours}h ago" if hours < 24 else f"{time_diff.days}d ago"
357
+
358
+ # TradingView-style breaking news banner
359
+ banner_html = f"""
360
+ <style>
361
+ @keyframes pulse-glow {{
362
+ 0%, 100% {{ box-shadow: 0 0 20px rgba(242, 54, 69, 0.6); }}
363
+ 50% {{ box-shadow: 0 0 30px rgba(242, 54, 69, 0.9); }}
364
+ }}
365
+
366
+ @keyframes slide-in {{
367
+ from {{ transform: translateX(-10px); opacity: 0; }}
368
+ to {{ transform: translateX(0); opacity: 1; }}
369
+ }}
370
+ </style>
371
+
372
+ <div style="
373
+ background: linear-gradient(135deg, #F23645 0%, #C91B28 100%);
374
+ border: 2px solid #FF6B78;
375
+ border-radius: 12px;
376
+ padding: 20px 24px;
377
+ margin-bottom: 24px;
378
+ animation: pulse-glow 2s ease-in-out infinite;
379
+ position: relative;
380
+ overflow: hidden;
381
+ ">
382
+ <!-- Animated background pattern -->
383
+ <div style="
384
+ position: absolute;
385
+ top: 0;
386
+ left: 0;
387
+ right: 0;
388
+ bottom: 0;
389
+ background: repeating-linear-gradient(
390
+ 45deg,
391
+ transparent,
392
+ transparent 10px,
393
+ rgba(255, 255, 255, 0.03) 10px,
394
+ rgba(255, 255, 255, 0.03) 20px
395
+ );
396
+ pointer-events: none;
397
+ "></div>
398
+
399
+ <!-- Content -->
400
+ <div style="position: relative; z-index: 1;">
401
+ <div style="display: flex; align-items: center; gap: 16px; margin-bottom: 12px;">
402
+ <!-- Animated icon -->
403
+ <div style="
404
+ font-size: 32px;
405
+ animation: pulse-glow 1s ease-in-out infinite;
406
+ filter: drop-shadow(0 2px 8px rgba(0, 0, 0, 0.3));
407
+ ">🚨</div>
408
+
409
+ <!-- Header -->
410
+ <div style="flex: 1;">
411
+ <div style="
412
+ color: white;
413
+ font-size: 14px;
414
+ font-weight: 700;
415
+ letter-spacing: 1.5px;
416
+ text-transform: uppercase;
417
+ margin-bottom: 4px;
418
+ font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;
419
+ text-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
420
+ ">⚡ Breaking News</div>
421
+ <div style="
422
+ color: rgba(255, 255, 255, 0.9);
423
+ font-size: 11px;
424
+ display: flex;
425
+ align-items: center;
426
+ gap: 8px;
427
+ ">
428
+ <span style="
429
+ background: rgba(255, 255, 255, 0.2);
430
+ padding: 2px 8px;
431
+ border-radius: 4px;
432
+ font-weight: 600;
433
+ ">{source}</span>
434
+ <span style="opacity: 0.8;">•</span>
435
+ <span style="opacity: 0.8;">{time_ago}</span>
436
+ </div>
437
+ </div>
438
+
439
+ <!-- Read button -->
440
+ <a href="{url}" target="_blank" style="
441
+ background: white;
442
+ color: #F23645;
443
+ padding: 10px 20px;
444
+ border-radius: 6px;
445
+ font-size: 13px;
446
+ font-weight: 700;
447
+ text-decoration: none;
448
+ display: inline-flex;
449
+ align-items: center;
450
+ gap: 6px;
451
+ transition: all 0.2s ease;
452
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
453
+ " onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 4px 12px rgba(0, 0, 0, 0.3)';"
454
+ onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='0 2px 8px rgba(0, 0, 0, 0.2)';">
455
+ READ NOW →
456
+ </a>
457
+ </div>
458
+
459
+ <!-- News summary -->
460
+ <div style="
461
+ color: white;
462
+ font-size: 16px;
463
+ font-weight: 500;
464
+ line-height: 1.5;
465
+ margin-left: 48px;
466
+ font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;
467
+ text-shadow: 0 1px 2px rgba(0, 0, 0, 0.2);
468
+ animation: slide-in 0.5s ease-out;
469
+ ">{summary}</div>
470
+ </div>
471
+ </div>
472
+ """
473
+
474
+ st.markdown(banner_html, unsafe_allow_html=True)
app/pages/05_Dashboard.py CHANGED
@@ -12,10 +12,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12
 
13
  from components.styles import DARK_THEME_CSS
14
  from components.news import (
15
- display_news_feed,
16
  display_news_statistics,
17
  display_category_breakdown,
18
- display_breaking_news_banner
 
19
  )
20
 
21
  # Import news scrapers
@@ -220,50 +220,57 @@ if not all_news_df.empty:
220
 
221
  st.markdown("---")
222
 
223
- # ---- SECTION 1: Twitter/X Breaking News (Highest Priority) ----
224
- st.markdown("## 🐦 Twitter/X Financial News Feed")
225
- st.caption("Real-time breaking news from premium Twitter/X accounts (WalterBloomberg, Reuters, Bloomberg, FT, etc.)")
226
 
227
- if not twitter_filtered.empty:
228
- display_news_feed(twitter_filtered, max_items=15)
229
- elif not twitter_df.empty:
230
- st.info("📭 No Twitter news matches your current filters.")
231
- else:
232
- st.info("⏳ Twitter news scraping in progress... This may take 30-60 seconds on first load.")
233
 
234
- st.markdown("---")
235
-
236
- # ---- SECTION 2: Main Page News (Web-Scraped) ----
237
- st.markdown("## 🔥 Top Stories from Main Pages")
238
- st.caption("Latest headlines directly scraped from news source homepages")
239
-
240
- if not rss_main_filtered.empty:
241
- display_news_feed(rss_main_filtered, max_items=10)
242
- elif not rss_main_df.empty:
243
- st.info("📭 No main page news matches your filters.")
244
- else:
245
- st.info("⏳ Main page news will appear here...")
246
-
247
- st.markdown("---")
248
-
249
- # ---- SECTION 3: RSS Feed News (Lowest Priority) ----
250
- col1, col2, col3 = st.columns([2, 1, 1])
251
  with col1:
252
- st.markdown("## 📰 RSS Feed News")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  with col2:
254
- show_count = st.selectbox("Show", [10, 20, 50, 100], index=1, label_visibility="collapsed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  with col3:
 
256
  if not rss_all_filtered.empty:
257
- st.caption(f"Displaying {min(show_count, len(rss_all_filtered))} of {len(rss_all_filtered)} stories")
258
-
259
- st.caption("Aggregated news from RSS feeds across all sources")
260
-
261
- if not rss_all_filtered.empty:
262
- display_news_feed(rss_all_filtered, max_items=show_count)
263
- elif not rss_all_df.empty:
264
- st.info("📭 No RSS news matches your current filters.")
265
- else:
266
- st.info(" RSS feed news will appear here...")
 
 
267
 
268
  # Auto-refresh logic
269
  if auto_refresh:
 
12
 
13
  from components.styles import DARK_THEME_CSS
14
  from components.news import (
 
15
  display_news_statistics,
16
  display_category_breakdown,
17
+ display_breaking_news_banner,
18
+ display_scrollable_news_section
19
  )
20
 
21
  # Import news scrapers
 
220
 
221
  st.markdown("---")
222
 
223
+ # ---- THREE-COLUMN SCROLLABLE NEWS LAYOUT (TradingView Style) ----
 
 
224
 
225
+ col1, col2, col3 = st.columns(3)
 
 
 
 
 
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  with col1:
228
+ # SECTION 1: Twitter/X Breaking News
229
+ if not twitter_filtered.empty:
230
+ display_scrollable_news_section(
231
+ twitter_filtered,
232
+ section_title="Twitter/X News",
233
+ section_icon="🐦",
234
+ section_subtitle="Real-time breaking news from premium accounts",
235
+ max_items=50,
236
+ height="700px"
237
+ )
238
+ elif not twitter_df.empty:
239
+ st.info("📭 No Twitter news matches your current filters.")
240
+ else:
241
+ st.info("⏳ Twitter news scraping in progress... This may take 30-60 seconds on first load.")
242
+
243
  with col2:
244
+ # SECTION 2: Main Page News (Web-Scraped)
245
+ if not rss_main_filtered.empty:
246
+ display_scrollable_news_section(
247
+ rss_main_filtered,
248
+ section_title="Top Headlines",
249
+ section_icon="🔥",
250
+ section_subtitle="Latest from main pages",
251
+ max_items=50,
252
+ height="700px"
253
+ )
254
+ elif not rss_main_df.empty:
255
+ st.info("📭 No main page news matches your filters.")
256
+ else:
257
+ st.info("⏳ Main page news will appear here...")
258
+
259
  with col3:
260
+ # SECTION 3: RSS Feed News
261
  if not rss_all_filtered.empty:
262
+ display_scrollable_news_section(
263
+ rss_all_filtered,
264
+ section_title="RSS Feed",
265
+ section_icon="📰",
266
+ section_subtitle="Aggregated from all sources",
267
+ max_items=100,
268
+ height="700px"
269
+ )
270
+ elif not rss_all_df.empty:
271
+ st.info("📭 No RSS news matches your current filters.")
272
+ else:
273
+ st.info("⏳ RSS feed news will appear here...")
274
 
275
  # Auto-refresh logic
276
  if auto_refresh:
app/services/twitter_news_playwright.py CHANGED
@@ -208,51 +208,58 @@ class TwitterFinanceMonitor:
208
  logger.warning("Chromium not found in standard paths")
209
  return '/usr/bin/chromium' # Fallback
210
 
211
- def _scrape_twitter_profile(self, source_name: str, source_info: Dict, timeout: int = 15) -> List[Dict]:
212
- """Scrape tweets from a single Twitter profile using Playwright"""
213
  if not PLAYWRIGHT_AVAILABLE:
214
  logger.warning("Playwright not available")
215
  return []
216
 
217
  try:
218
  with sync_playwright() as p:
219
- # Launch lightweight browser
220
  browser = p.chromium.launch(
221
  executable_path=self.chromium_path,
222
  headless=True,
223
- args=['--disable-blink-features=AutomationControlled']
 
 
 
 
 
 
 
224
  )
225
  context = browser.new_context(
226
  user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
227
  )
228
  page = context.new_page()
229
 
230
- # Block images, fonts, and css for speed
231
  def route_intercept(route):
232
- if route.request.resource_type in ["image", "media", "font", "stylesheet"]:
233
  route.abort()
234
  else:
235
  route.continue_()
236
 
237
  page.route("**/*", route_intercept)
238
 
239
- # Navigate to profile
240
- logger.info(f"Scraping {source_name} from {source_info['url']}")
241
- page.goto(source_info['url'], timeout=timeout * 1000)
242
 
243
- # Wait for tweets to load
244
  try:
245
- page.wait_for_selector("article", timeout=timeout * 1000)
246
  except PlaywrightTimeoutError:
247
  logger.warning(f"Timeout waiting for tweets from {source_name}")
248
  browser.close()
249
  return []
250
 
251
- # Extract tweet texts
252
  tweet_elements = page.locator("article div[data-testid='tweetText']").all()
253
 
254
  news_items = []
255
- for idx, element in enumerate(tweet_elements[:20]): # Limit to 20 most recent
256
  try:
257
  text = element.text_content()
258
  if not text or len(text) < 10:
@@ -308,7 +315,7 @@ class TwitterFinanceMonitor:
308
  def scrape_twitter_news(_self, max_tweets: int = 100) -> List[Dict]:
309
  """
310
  Scrape latest financial news from Twitter using Playwright
311
- Runs in parallel for better performance
312
  """
313
  if not PLAYWRIGHT_AVAILABLE:
314
  logger.info("Playwright not available - using mock data")
@@ -317,17 +324,26 @@ class TwitterFinanceMonitor:
317
  all_news = []
318
  seen_texts = set()
319
 
320
- # Scrape sources in parallel with timeout
321
- with ThreadPoolExecutor(max_workers=3) as executor:
 
 
 
 
 
 
 
 
322
  futures = []
323
- for name, info in _self.SOURCES.items():
324
- future = executor.submit(_self._scrape_twitter_profile, name, info, timeout=15)
 
325
  futures.append((future, name))
326
 
327
  for future, source_name in futures:
328
  try:
329
- # Wait max 20 seconds per source
330
- news_items = future.result(timeout=20)
331
 
332
  # Deduplicate based on text similarity
333
  unique_items = []
@@ -342,7 +358,7 @@ class TwitterFinanceMonitor:
342
  logger.info(f"Fetched {len(unique_items)} unique tweets from {source_name}")
343
 
344
  except FuturesTimeoutError:
345
- logger.warning(f"Timeout scraping {source_name}")
346
  except Exception as e:
347
  logger.error(f"Error processing {source_name}: {e}")
348
 
 
208
  logger.warning("Chromium not found in standard paths")
209
  return '/usr/bin/chromium' # Fallback
210
 
211
+ def _scrape_twitter_profile(self, source_name: str, source_info: Dict, timeout: int = 12) -> List[Dict]:
212
+ """Scrape tweets from a single Twitter profile using Playwright (optimized for speed)"""
213
  if not PLAYWRIGHT_AVAILABLE:
214
  logger.warning("Playwright not available")
215
  return []
216
 
217
  try:
218
  with sync_playwright() as p:
219
+ # Launch lightweight browser with aggressive performance flags
220
  browser = p.chromium.launch(
221
  executable_path=self.chromium_path,
222
  headless=True,
223
+ args=[
224
+ '--disable-blink-features=AutomationControlled',
225
+ '--disable-dev-shm-usage', # Overcome limited resource problems
226
+ '--no-sandbox', # Required for some environments
227
+ '--disable-setuid-sandbox',
228
+ '--disable-gpu', # Not needed in headless
229
+ '--disable-software-rasterizer'
230
+ ]
231
  )
232
  context = browser.new_context(
233
  user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
234
  )
235
  page = context.new_page()
236
 
237
+ # Block images, fonts, css, and videos for maximum speed
238
  def route_intercept(route):
239
+ if route.request.resource_type in ["image", "media", "font", "stylesheet", "video"]:
240
  route.abort()
241
  else:
242
  route.continue_()
243
 
244
  page.route("**/*", route_intercept)
245
 
246
+ # Navigate to profile with reduced timeout
247
+ logger.info(f"Scraping {source_name}...")
248
+ page.goto(source_info['url'], timeout=timeout * 1000, wait_until="domcontentloaded") # Don't wait for full load
249
 
250
+ # Wait for tweets to load with reduced timeout
251
  try:
252
+ page.wait_for_selector("article", timeout=8000) # Fixed 8 second wait
253
  except PlaywrightTimeoutError:
254
  logger.warning(f"Timeout waiting for tweets from {source_name}")
255
  browser.close()
256
  return []
257
 
258
+ # Extract tweet texts (limit to 15 for speed)
259
  tweet_elements = page.locator("article div[data-testid='tweetText']").all()
260
 
261
  news_items = []
262
+ for idx, element in enumerate(tweet_elements[:15]): # Reduced from 20 to 15 for speed
263
  try:
264
  text = element.text_content()
265
  if not text or len(text) < 10:
 
315
  def scrape_twitter_news(_self, max_tweets: int = 100) -> List[Dict]:
316
  """
317
  Scrape latest financial news from Twitter using Playwright
318
+ Runs in parallel for better performance - 19 sources in ~30-45 seconds
319
  """
320
  if not PLAYWRIGHT_AVAILABLE:
321
  logger.info("Playwright not available - using mock data")
 
324
  all_news = []
325
  seen_texts = set()
326
 
327
+ # Sort sources by weight (priority) - scrape high-value sources first
328
+ sorted_sources = sorted(
329
+ _self.SOURCES.items(),
330
+ key=lambda x: x[1]['weight'],
331
+ reverse=True
332
+ )
333
+
334
+ # Scrape sources in parallel with higher concurrency
335
+ # 8 workers = 19 sources in 3 batches (~30-45 seconds total)
336
+ with ThreadPoolExecutor(max_workers=8) as executor:
337
  futures = []
338
+ for name, info in sorted_sources:
339
+ # Reduced timeout for faster failures
340
+ future = executor.submit(_self._scrape_twitter_profile, name, info, timeout=12)
341
  futures.append((future, name))
342
 
343
  for future, source_name in futures:
344
  try:
345
+ # Wait max 15 seconds per source (down from 20)
346
+ news_items = future.result(timeout=15)
347
 
348
  # Deduplicate based on text similarity
349
  unique_items = []
 
358
  logger.info(f"Fetched {len(unique_items)} unique tweets from {source_name}")
359
 
360
  except FuturesTimeoutError:
361
+ logger.warning(f"Timeout scraping {source_name} - skipping")
362
  except Exception as e:
363
  logger.error(f"Error processing {source_name}: {e}")
364