Dmitry Beresnev commited on
Commit
a584bff
·
1 Parent(s): f6443c4

add prediction markets, sectoral news, market events, economic calendar

Browse files
app/components/news.py CHANGED
@@ -425,3 +425,318 @@ to {{ transform: translateX(0); opacity: 1; }}
425
  </div>"""
426
 
427
  st.markdown(banner_html, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  </div>"""
426
 
427
  st.markdown(banner_html, unsafe_allow_html=True)
428
+
429
+
430
+ def display_prediction_card(prediction_item: dict):
431
+ """Display a single prediction market card with probability visualization."""
432
+
433
+ # Escape HTML in text
434
+ title = html_module.escape(prediction_item.get('title', '').strip())
435
+ source = html_module.escape(prediction_item['source'])
436
+ url = html_module.escape(prediction_item['url'])
437
+
438
+ # Get probabilities
439
+ yes_prob = prediction_item.get('yes_probability', 50.0)
440
+ no_prob = prediction_item.get('no_probability', 50.0)
441
+
442
+ # Determine bar color based on probabilities
443
+ if yes_prob > 60:
444
+ bar_color = '#089981' # Green - likely YES
445
+ sentiment_text = 'YES LIKELY'
446
+ elif no_prob > 60:
447
+ bar_color = '#F23645' # Red - likely NO
448
+ sentiment_text = 'NO LIKELY'
449
+ else:
450
+ bar_color = '#FF9800' # Orange - balanced
451
+ sentiment_text = 'BALANCED'
452
+
453
+ # Format end date if available
454
+ end_date = prediction_item.get('end_date')
455
+ if end_date:
456
+ if isinstance(end_date, str):
457
+ end_date_display = end_date
458
+ else:
459
+ days_until = (end_date - datetime.now()).days
460
+ end_date_display = f"Closes in {days_until}d" if days_until > 0 else "Closed"
461
+ else:
462
+ end_date_display = ""
463
+
464
+ # Volume display
465
+ volume = prediction_item.get('volume', 0)
466
+ if volume > 1000000:
467
+ volume_display = f"${volume/1000000:.1f}M volume"
468
+ elif volume > 1000:
469
+ volume_display = f"${volume/1000:.1f}K volume"
470
+ elif volume > 0:
471
+ volume_display = f"${volume:.0f} volume"
472
+ else:
473
+ volume_display = ""
474
+
475
+ # Prediction card HTML
476
+ card_html = f"""
477
+ <div style="
478
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
479
+ border: 1px solid #2A2E39;
480
+ border-radius: 8px;
481
+ padding: 16px;
482
+ margin-bottom: 12px;
483
+ transition: all 0.2s ease;
484
+ cursor: pointer;
485
+ " onmouseover="this.style.borderColor='#3861FB'; this.style.transform='translateY(-2px)';"
486
+ onmouseout="this.style.borderColor='#2A2E39'; this.style.transform='translateY(0)';">
487
+
488
+ <!-- Header -->
489
+ <div style="margin-bottom: 12px;">
490
+ <div style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 8px;">
491
+ <span style="color: #3861FB; font-weight: 600; font-size: 13px;">{source}</span>
492
+ <span style="
493
+ background: {bar_color};
494
+ color: white;
495
+ padding: 2px 8px;
496
+ border-radius: 4px;
497
+ font-size: 10px;
498
+ font-weight: 700;
499
+ ">{sentiment_text}</span>
500
+ </div>
501
+ <div style="color: #D1D4DC; font-size: 14px; font-weight: 500; line-height: 1.4; margin-bottom: 8px;">
502
+ {title}
503
+ </div>
504
+ </div>
505
+
506
+ <!-- Probability Visualization -->
507
+ <div style="margin-bottom: 10px;">
508
+ <div style="display: flex; justify-content: space-between; margin-bottom: 4px;">
509
+ <span style="color: #089981; font-size: 12px; font-weight: 600;">YES {yes_prob:.1f}%</span>
510
+ <span style="color: #F23645; font-size: 12px; font-weight: 600;">NO {no_prob:.1f}%</span>
511
+ </div>
512
+ <!-- Horizontal probability bar -->
513
+ <div style="
514
+ display: flex;
515
+ height: 8px;
516
+ border-radius: 4px;
517
+ overflow: hidden;
518
+ background: #2A2E39;
519
+ ">
520
+ <div style="
521
+ width: {yes_prob}%;
522
+ background: #089981;
523
+ transition: width 0.3s ease;
524
+ "></div>
525
+ <div style="
526
+ width: {no_prob}%;
527
+ background: #F23645;
528
+ transition: width 0.3s ease;
529
+ "></div>
530
+ </div>
531
+ </div>
532
+
533
+ <!-- Footer info -->
534
+ <div style="display: flex; justify-content: space-between; align-items: center;">
535
+ <div style="color: #787B86; font-size: 11px;">
536
+ {end_date_display}{" • " + volume_display if volume_display and end_date_display else volume_display}
537
+ </div>
538
+ <a href="{url}" target="_blank" style="
539
+ color: #3861FB;
540
+ font-size: 11px;
541
+ font-weight: 600;
542
+ text-decoration: none;
543
+ ">View Market →</a>
544
+ </div>
545
+ </div>
546
+ """
547
+
548
+ st.markdown(card_html, unsafe_allow_html=True)
549
+
550
+
551
+ def display_economic_event_card(event_item: dict):
552
+ """Display a single economic event card with forecast/actual comparison."""
553
+
554
+ # Escape HTML
555
+ title = html_module.escape(event_item.get('event_name', event_item.get('title', '')).strip())
556
+ country = html_module.escape(event_item.get('country', 'US'))
557
+ url = html_module.escape(event_item.get('url', ''))
558
+
559
+ # Get values
560
+ forecast = event_item.get('forecast')
561
+ previous = event_item.get('previous')
562
+ actual = event_item.get('actual')
563
+ importance = event_item.get('importance', 'medium')
564
+
565
+ # Importance badge color
566
+ importance_colors = {
567
+ 'high': '#F23645',
568
+ 'medium': '#FF9800',
569
+ 'low': '#787B86'
570
+ }
571
+ importance_color = importance_colors.get(importance, '#787B86')
572
+
573
+ # Time to event
574
+ time_to_event = event_item.get('time_to_event', '')
575
+
576
+ # Format values with unit detection
577
+ def format_value(val):
578
+ if val is None:
579
+ return '-'
580
+ if isinstance(val, (int, float)):
581
+ # Check if it looks like a percentage
582
+ if abs(val) < 100:
583
+ return f"{val:.1f}%"
584
+ else:
585
+ return f"{val:.1f}"
586
+ return str(val)
587
+
588
+ forecast_display = format_value(forecast)
589
+ previous_display = format_value(previous)
590
+ actual_display = format_value(actual)
591
+
592
+ # Determine if beat/miss
593
+ beat_miss_html = ""
594
+ if actual is not None and forecast is not None:
595
+ if actual > forecast:
596
+ beat_miss_html = '<span style="color: #089981; font-weight: 700;">[BEAT]</span>'
597
+ elif actual < forecast:
598
+ beat_miss_html = '<span style="color: #F23645; font-weight: 700;">[MISS]</span>'
599
+
600
+ # Country flag emojis
601
+ country_flags = {
602
+ 'US': '🇺🇸',
603
+ 'EU': '🇪🇺',
604
+ 'UK': '🇬🇧',
605
+ 'JP': '🇯🇵',
606
+ 'CN': '🇨🇳',
607
+ 'CA': '🇨🇦',
608
+ 'AU': '🇦🇺'
609
+ }
610
+ flag = country_flags.get(country, '🌍')
611
+
612
+ # Event card HTML
613
+ card_html = f"""
614
+ <div style="
615
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
616
+ border: 1px solid #2A2E39;
617
+ border-radius: 8px;
618
+ padding: 16px;
619
+ margin-bottom: 12px;
620
+ transition: all 0.2s ease;
621
+ " onmouseover="this.style.borderColor='#3861FB'; this.style.transform='translateY(-2px)';"
622
+ onmouseout="this.style.borderColor='#2A2E39'; this.style.transform='translateY(0)';">
623
+
624
+ <!-- Header -->
625
+ <div style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 12px;">
626
+ <div style="flex: 1;">
627
+ <div style="display: flex; align-items: center; gap: 8px; margin-bottom: 6px;">
628
+ <span style="font-size: 20px;">{flag}</span>
629
+ <span style="
630
+ background: {importance_color};
631
+ color: white;
632
+ padding: 2px 8px;
633
+ border-radius: 4px;
634
+ font-size: 10px;
635
+ font-weight: 700;
636
+ ">{importance.upper()}</span>
637
+ </div>
638
+ <div style="color: #D1D4DC; font-size: 14px; font-weight: 500; line-height: 1.4;">
639
+ {title}
640
+ </div>
641
+ </div>
642
+ {f'<div style="color: #3861FB; font-size: 12px; font-weight: 600; white-space: nowrap; margin-left: 12px;">{time_to_event}</div>' if time_to_event else ''}
643
+ </div>
644
+
645
+ <!-- Values comparison -->
646
+ <div style="background: #0D0E13; border-radius: 6px; padding: 10px; margin-bottom: 8px;">
647
+ <div style="display: flex; justify-content: space-between; margin-bottom: 6px;">
648
+ <span style="color: #787B86; font-size: 11px;">Forecast:</span>
649
+ <span style="color: #D1D4DC; font-size: 12px; font-weight: 600;">{forecast_display}</span>
650
+ </div>
651
+ <div style="display: flex; justify-content: space-between; margin-bottom: 6px;">
652
+ <span style="color: #787B86; font-size: 11px;">Previous:</span>
653
+ <span style="color: #D1D4DC; font-size: 12px; font-weight: 600;">{previous_display}</span>
654
+ </div>
655
+ {f'<div style="display: flex; justify-content: space-between;"><span style="color: #787B86; font-size: 11px;">Actual:</span><span style="color: #D1D4DC; font-size: 12px; font-weight: 600;">{actual_display} {beat_miss_html}</span></div>' if actual is not None else ''}
656
+ </div>
657
+ </div>
658
+ """
659
+
660
+ st.markdown(card_html, unsafe_allow_html=True)
661
+
662
+
663
+ def display_economic_calendar_widget(events_df: pd.DataFrame):
664
+ """Display economic calendar widget showing upcoming events."""
665
+
666
+ if events_df.empty:
667
+ st.info("📅 No upcoming economic events in the next 7 days")
668
+ return
669
+
670
+ # Widget container
671
+ widget_html = """
672
+ <div style="
673
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
674
+ border: 1px solid #2A2E39;
675
+ border-radius: 12px;
676
+ padding: 20px;
677
+ margin-bottom: 20px;
678
+ ">
679
+ <div style="margin-bottom: 16px;">
680
+ <h3 style="color: #D1D4DC; font-size: 18px; font-weight: 600; margin: 0;">
681
+ 📅 Economic Calendar
682
+ </h3>
683
+ <p style="color: #787B86; font-size: 13px; margin: 4px 0 0 0;">
684
+ Upcoming high-impact events
685
+ </p>
686
+ </div>
687
+ """
688
+
689
+ # Show top 10 events
690
+ for idx, event in events_df.head(10).iterrows():
691
+ # Get event details
692
+ event_name = html_module.escape(event.get('event_name', event.get('title', '')))
693
+ country = html_module.escape(event.get('country', 'US'))
694
+ importance = event.get('importance', 'medium')
695
+ time_to_event = event.get('time_to_event', '')
696
+ forecast = event.get('forecast')
697
+
698
+ # Country flags
699
+ country_flags = {
700
+ 'US': '🇺🇸',
701
+ 'EU': '🇪🇺',
702
+ 'UK': '🇬🇧',
703
+ 'JP': '🇯🇵',
704
+ 'CN': '🇨🇳'
705
+ }
706
+ flag = country_flags.get(country, '🌍')
707
+
708
+ # Importance stars
709
+ stars = '⭐' * ({'high': 3, 'medium': 2, 'low': 1}.get(importance, 1))
710
+
711
+ # Format forecast
712
+ forecast_display = f"{forecast:.1f}" if forecast is not None else "N/A"
713
+
714
+ event_html = f"""
715
+ <div style="
716
+ background: #0D0E13;
717
+ border-left: 3px solid {'#F23645' if importance == 'high' else '#FF9800' if importance == 'medium' else '#787B86'};
718
+ border-radius: 6px;
719
+ padding: 12px;
720
+ margin-bottom: 10px;
721
+ ">
722
+ <div style="display: flex; justify-content: space-between; align-items: center;">
723
+ <div style="flex: 1;">
724
+ <div style="color: #D1D4DC; font-size: 13px; font-weight: 500; margin-bottom: 4px;">
725
+ {flag} {event_name}
726
+ </div>
727
+ <div style="color: #787B86; font-size: 11px;">
728
+ {stars} Forecast: {forecast_display}
729
+ </div>
730
+ </div>
731
+ <div style="color: #3861FB; font-size: 12px; font-weight: 600; white-space: nowrap; margin-left: 12px;">
732
+ {time_to_event}
733
+ </div>
734
+ </div>
735
+ </div>
736
+ """
737
+
738
+ widget_html += event_html
739
+
740
+ widget_html += "</div>"
741
+
742
+ st.markdown(widget_html, unsafe_allow_html=True)
app/pages/05_Dashboard.py CHANGED
@@ -15,7 +15,10 @@ from components.news import (
15
  display_news_statistics,
16
  display_category_breakdown,
17
  display_breaking_news_banner,
18
- display_scrollable_news_section
 
 
 
19
  )
20
  from utils.breaking_news_scorer import get_breaking_news_scorer
21
 
@@ -44,6 +47,30 @@ try:
44
  except ImportError:
45
  AI_TECH_AVAILABLE = False
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # ---- Page Configuration ----
49
  st.set_page_config(
@@ -69,10 +96,26 @@ if 'reddit_monitor' not in st.session_state and REDDIT_AVAILABLE:
69
  if 'ai_tech_monitor' not in st.session_state and AI_TECH_AVAILABLE:
70
  st.session_state.ai_tech_monitor = AITechNewsScraper()
71
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  rss_monitor = st.session_state.get('rss_monitor')
73
  twitter_monitor = st.session_state.get('twitter_monitor')
74
  reddit_monitor = st.session_state.get('reddit_monitor')
75
  ai_tech_monitor = st.session_state.get('ai_tech_monitor')
 
 
 
 
76
 
77
  # Initialize unified cache manager
78
  if 'news_cache_manager' not in st.session_state:
@@ -165,7 +208,10 @@ with st.sidebar:
165
  reddit_sources = len(reddit_monitor.SUBREDDITS) if reddit_monitor else 0
166
  rss_sources = len(rss_monitor.SOURCES) if rss_monitor else 0
167
  ai_tech_sources = len(ai_tech_monitor.SOURCES) if ai_tech_monitor else 0
168
- total_sources = twitter_sources + reddit_sources + rss_sources + ai_tech_sources
 
 
 
169
 
170
  st.markdown(f"""
171
  <div style='font-size: 11px; line-height: 1.6;'>
@@ -192,6 +238,17 @@ with st.sidebar:
192
  • TechCrunch • The Verge • VentureBeat
193
  • MIT Tech Review • Wired • Ars Technica
194
 
 
 
 
 
 
 
 
 
 
 
 
195
  **Total: {total_sources} Premium Sources**
196
  </div>
197
  """, unsafe_allow_html=True)
@@ -211,6 +268,10 @@ reddit_df = pd.DataFrame()
211
  rss_all_df = pd.DataFrame()
212
  rss_main_df = pd.DataFrame()
213
  ai_tech_df = pd.DataFrame()
 
 
 
 
214
 
215
  def fetch_twitter_news():
216
  """Fetch Twitter/X news via cache manager"""
@@ -294,19 +355,102 @@ def fetch_ai_tech_news():
294
  return pd.DataFrame(), f"AI/Tech news unavailable: {e}"
295
  return pd.DataFrame(), None
296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  # Progressive loading: Display results as they arrive
298
  # Create a status placeholder to show progress
299
  status_placeholder = st.empty()
300
 
301
  # Execute all news fetching operations in parallel using ThreadPoolExecutor
302
- with st.spinner("Loading news from 4 sources..."):
303
- with ThreadPoolExecutor(max_workers=4) as executor:
304
  # Submit all tasks with source name attached
305
  futures_map = {
306
  executor.submit(fetch_twitter_news): 'twitter',
307
  executor.submit(fetch_reddit_news): 'reddit',
308
  executor.submit(fetch_rss_news): 'rss',
309
- executor.submit(fetch_ai_tech_news): 'ai_tech'
 
 
 
 
310
  }
311
 
312
  # Track errors and completion
@@ -323,7 +467,7 @@ with st.spinner("Loading news from 4 sources..."):
323
 
324
  # Update status
325
  completed_sources.append(source_name)
326
- status_placeholder.info(f"🔍 Loaded {len(completed_sources)}/4 sources ({', '.join(completed_sources)})")
327
 
328
  if source_name == 'twitter':
329
  twitter_df = result_df
@@ -344,6 +488,22 @@ with st.spinner("Loading news from 4 sources..."):
344
  ai_tech_df = result_df
345
  if error:
346
  fetch_errors.append(error)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
  except Exception as e:
349
  fetch_errors.append(f"Error fetching {source_name} news: {e}")
@@ -353,7 +513,7 @@ with st.spinner("Loading news from 4 sources..."):
353
  except TimeoutError:
354
  # Handle timeout gracefully - continue with whatever results we have
355
  fetch_errors.append("⏱️ Some sources timed out after 90 seconds - displaying available results")
356
- status_placeholder.warning(f"⚠️ {len(completed_sources)}/4 sources loaded (some timed out)")
357
 
358
  # Mark incomplete sources
359
  all_sources = set(futures_map.values())
@@ -363,7 +523,7 @@ with st.spinner("Loading news from 4 sources..."):
363
  completed_sources.append(f"{source} (timeout)")
364
 
365
  # Clear the status message after all sources complete
366
- status_placeholder.success(f"✅ Loaded {len(completed_sources)}/4 sources successfully")
367
 
368
  # Debug output (remove in production)
369
  if st.session_state.get('debug_mode', False):
@@ -430,6 +590,11 @@ else:
430
 
431
  st.markdown("---")
432
 
 
 
 
 
 
433
  # ---- FOUR-COLUMN SCROLLABLE NEWS LAYOUT (TradingView Style) ----
434
 
435
  col1, col2, col3, col4 = st.columns(4)
@@ -581,6 +746,90 @@ with col4:
581
  </style>
582
  """, unsafe_allow_html=True)
583
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  # Display fetch errors in expander (less intrusive)
585
  if 'fetch_errors' in locals() and fetch_errors:
586
  with st.expander("⚠️ Source Fetch Warnings", expanded=False):
 
15
  display_news_statistics,
16
  display_category_breakdown,
17
  display_breaking_news_banner,
18
+ display_scrollable_news_section,
19
+ display_prediction_card,
20
+ display_economic_event_card,
21
+ display_economic_calendar_widget
22
  )
23
  from utils.breaking_news_scorer import get_breaking_news_scorer
24
 
 
47
  except ImportError:
48
  AI_TECH_AVAILABLE = False
49
 
50
+ try:
51
+ from services.prediction_markets import PredictionMarketsScraper
52
+ PREDICTIONS_AVAILABLE = True
53
+ except ImportError:
54
+ PREDICTIONS_AVAILABLE = False
55
+
56
+ try:
57
+ from services.sectoral_news import SectoralNewsScraper
58
+ SECTORAL_AVAILABLE = True
59
+ except ImportError:
60
+ SECTORAL_AVAILABLE = False
61
+
62
+ try:
63
+ from services.market_events import MarketEventsScraper
64
+ EVENTS_AVAILABLE = True
65
+ except ImportError:
66
+ EVENTS_AVAILABLE = False
67
+
68
+ try:
69
+ from services.economic_calendar import EconomicCalendarService
70
+ CALENDAR_AVAILABLE = True
71
+ except ImportError:
72
+ CALENDAR_AVAILABLE = False
73
+
74
 
75
  # ---- Page Configuration ----
76
  st.set_page_config(
 
96
  if 'ai_tech_monitor' not in st.session_state and AI_TECH_AVAILABLE:
97
  st.session_state.ai_tech_monitor = AITechNewsScraper()
98
 
99
+ if 'prediction_markets_monitor' not in st.session_state and PREDICTIONS_AVAILABLE:
100
+ st.session_state.prediction_markets_monitor = PredictionMarketsScraper()
101
+
102
+ if 'sectoral_news_monitor' not in st.session_state and SECTORAL_AVAILABLE:
103
+ st.session_state.sectoral_news_monitor = SectoralNewsScraper()
104
+
105
+ if 'market_events_monitor' not in st.session_state and EVENTS_AVAILABLE:
106
+ st.session_state.market_events_monitor = MarketEventsScraper()
107
+
108
+ if 'economic_calendar_service' not in st.session_state and CALENDAR_AVAILABLE:
109
+ st.session_state.economic_calendar_service = EconomicCalendarService()
110
+
111
  rss_monitor = st.session_state.get('rss_monitor')
112
  twitter_monitor = st.session_state.get('twitter_monitor')
113
  reddit_monitor = st.session_state.get('reddit_monitor')
114
  ai_tech_monitor = st.session_state.get('ai_tech_monitor')
115
+ prediction_markets_monitor = st.session_state.get('prediction_markets_monitor')
116
+ sectoral_news_monitor = st.session_state.get('sectoral_news_monitor')
117
+ market_events_monitor = st.session_state.get('market_events_monitor')
118
+ economic_calendar_service = st.session_state.get('economic_calendar_service')
119
 
120
  # Initialize unified cache manager
121
  if 'news_cache_manager' not in st.session_state:
 
208
  reddit_sources = len(reddit_monitor.SUBREDDITS) if reddit_monitor else 0
209
  rss_sources = len(rss_monitor.SOURCES) if rss_monitor else 0
210
  ai_tech_sources = len(ai_tech_monitor.SOURCES) if ai_tech_monitor else 0
211
+ prediction_sources = 3 # Polymarket, Metaculus, CME FedWatch
212
+ sectoral_sources = 7 # 7 sectors
213
+ events_sources = 3 # Earnings, indicators, central banks
214
+ total_sources = twitter_sources + reddit_sources + rss_sources + ai_tech_sources + prediction_sources + sectoral_sources + events_sources
215
 
216
  st.markdown(f"""
217
  <div style='font-size: 11px; line-height: 1.6;'>
 
238
  • TechCrunch • The Verge • VentureBeat
239
  • MIT Tech Review • Wired • Ars Technica
240
 
241
+ **Prediction Markets ({prediction_sources})**
242
+ • Polymarket • Metaculus • CME FedWatch
243
+
244
+ **Sectoral Coverage ({sectoral_sources})**
245
+ • Finance • Tech • Energy • Healthcare
246
+ • Consumer • Industrials • Real Estate
247
+
248
+ **Market Events ({events_sources})**
249
+ • Earnings Calendar • Economic Indicators
250
+ • Central Bank Events (Fed, ECB, BoE, BoJ)
251
+
252
  **Total: {total_sources} Premium Sources**
253
  </div>
254
  """, unsafe_allow_html=True)
 
268
  rss_all_df = pd.DataFrame()
269
  rss_main_df = pd.DataFrame()
270
  ai_tech_df = pd.DataFrame()
271
+ predictions_df = pd.DataFrame()
272
+ sectoral_news_df = pd.DataFrame()
273
+ market_events_df = pd.DataFrame()
274
+ economic_calendar_df = pd.DataFrame()
275
 
276
  def fetch_twitter_news():
277
  """Fetch Twitter/X news via cache manager"""
 
355
  return pd.DataFrame(), f"AI/Tech news unavailable: {e}"
356
  return pd.DataFrame(), None
357
 
358
+ def fetch_prediction_markets():
359
+ """Fetch prediction market data via cache manager"""
360
+ try:
361
+ if prediction_markets_monitor:
362
+ predictions = cache_manager.get_news(
363
+ source='predictions',
364
+ fetcher_func=prediction_markets_monitor.scrape_predictions,
365
+ force_refresh=force_refresh,
366
+ max_items=50
367
+ )
368
+ if predictions:
369
+ df = pd.DataFrame(predictions)
370
+ if not df.empty:
371
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
372
+ return df, None
373
+ except Exception as e:
374
+ return pd.DataFrame(), f"Prediction markets unavailable: {e}"
375
+ return pd.DataFrame(), None
376
+
377
+ def fetch_sectoral_news():
378
+ """Fetch sectoral news via cache manager"""
379
+ try:
380
+ if sectoral_news_monitor:
381
+ sectoral_news = cache_manager.get_news(
382
+ source='sectoral_news',
383
+ fetcher_func=sectoral_news_monitor.scrape_sectoral_news,
384
+ force_refresh=force_refresh,
385
+ max_items=50,
386
+ hours=24
387
+ )
388
+ if sectoral_news:
389
+ df = pd.DataFrame(sectoral_news)
390
+ if not df.empty:
391
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
392
+ return df, None
393
+ except Exception as e:
394
+ return pd.DataFrame(), f"Sectoral news unavailable: {e}"
395
+ return pd.DataFrame(), None
396
+
397
+ def fetch_market_events():
398
+ """Fetch market events via cache manager"""
399
+ try:
400
+ if market_events_monitor:
401
+ events = cache_manager.get_news(
402
+ source='market_events',
403
+ fetcher_func=market_events_monitor.scrape_market_events,
404
+ force_refresh=force_refresh,
405
+ max_items=50,
406
+ days_ahead=14
407
+ )
408
+ if events:
409
+ df = pd.DataFrame(events)
410
+ if not df.empty:
411
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
412
+ return df, None
413
+ except Exception as e:
414
+ return pd.DataFrame(), f"Market events unavailable: {e}"
415
+ return pd.DataFrame(), None
416
+
417
+ def fetch_economic_calendar():
418
+ """Fetch economic calendar via cache manager"""
419
+ try:
420
+ if economic_calendar_service:
421
+ calendar_events = cache_manager.get_news(
422
+ source='economic_calendar',
423
+ fetcher_func=economic_calendar_service.get_upcoming_events,
424
+ force_refresh=force_refresh,
425
+ days_ahead=7,
426
+ min_importance='medium'
427
+ )
428
+ if calendar_events:
429
+ df = pd.DataFrame(calendar_events)
430
+ if not df.empty:
431
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
432
+ return df, None
433
+ except Exception as e:
434
+ return pd.DataFrame(), f"Economic calendar unavailable: {e}"
435
+ return pd.DataFrame(), None
436
+
437
  # Progressive loading: Display results as they arrive
438
  # Create a status placeholder to show progress
439
  status_placeholder = st.empty()
440
 
441
  # Execute all news fetching operations in parallel using ThreadPoolExecutor
442
+ with st.spinner("Loading news from 8 sources..."):
443
+ with ThreadPoolExecutor(max_workers=8) as executor:
444
  # Submit all tasks with source name attached
445
  futures_map = {
446
  executor.submit(fetch_twitter_news): 'twitter',
447
  executor.submit(fetch_reddit_news): 'reddit',
448
  executor.submit(fetch_rss_news): 'rss',
449
+ executor.submit(fetch_ai_tech_news): 'ai_tech',
450
+ executor.submit(fetch_prediction_markets): 'predictions',
451
+ executor.submit(fetch_sectoral_news): 'sectoral_news',
452
+ executor.submit(fetch_market_events): 'market_events',
453
+ executor.submit(fetch_economic_calendar): 'economic_calendar'
454
  }
455
 
456
  # Track errors and completion
 
467
 
468
  # Update status
469
  completed_sources.append(source_name)
470
+ status_placeholder.info(f"🔍 Loaded {len(completed_sources)}/8 sources ({', '.join(completed_sources)})")
471
 
472
  if source_name == 'twitter':
473
  twitter_df = result_df
 
488
  ai_tech_df = result_df
489
  if error:
490
  fetch_errors.append(error)
491
+ elif source_name == 'predictions':
492
+ predictions_df = result_df
493
+ if error:
494
+ fetch_errors.append(error)
495
+ elif source_name == 'sectoral_news':
496
+ sectoral_news_df = result_df
497
+ if error:
498
+ fetch_errors.append(error)
499
+ elif source_name == 'market_events':
500
+ market_events_df = result_df
501
+ if error:
502
+ fetch_errors.append(error)
503
+ elif source_name == 'economic_calendar':
504
+ economic_calendar_df = result_df
505
+ if error:
506
+ fetch_errors.append(error)
507
 
508
  except Exception as e:
509
  fetch_errors.append(f"Error fetching {source_name} news: {e}")
 
513
  except TimeoutError:
514
  # Handle timeout gracefully - continue with whatever results we have
515
  fetch_errors.append("⏱️ Some sources timed out after 90 seconds - displaying available results")
516
+ status_placeholder.warning(f"⚠️ {len(completed_sources)}/8 sources loaded (some timed out)")
517
 
518
  # Mark incomplete sources
519
  all_sources = set(futures_map.values())
 
523
  completed_sources.append(f"{source} (timeout)")
524
 
525
  # Clear the status message after all sources complete
526
+ status_placeholder.success(f"✅ Loaded {len(completed_sources)}/8 sources successfully")
527
 
528
  # Debug output (remove in production)
529
  if st.session_state.get('debug_mode', False):
 
590
 
591
  st.markdown("---")
592
 
593
+ # ---- ECONOMIC CALENDAR WIDGET ----
594
+ if not economic_calendar_df.empty:
595
+ display_economic_calendar_widget(economic_calendar_df)
596
+ st.markdown("---")
597
+
598
  # ---- FOUR-COLUMN SCROLLABLE NEWS LAYOUT (TradingView Style) ----
599
 
600
  col1, col2, col3, col4 = st.columns(4)
 
746
  </style>
747
  """, unsafe_allow_html=True)
748
 
749
+ # ---- SECOND ROW: MARKET INTELLIGENCE (3 COLUMNS) ----
750
+ st.markdown("---")
751
+ st.markdown("## 📊 Market Intelligence - Predictions, Sectors & Events")
752
+
753
+ col5, col6, col7 = st.columns(3)
754
+
755
+ with col5:
756
+ # Prediction Markets Column
757
+ if not predictions_df.empty:
758
+ display_scrollable_news_section(
759
+ predictions_df,
760
+ section_title="Prediction Markets",
761
+ section_icon="🎲",
762
+ section_subtitle="Polymarket, Metaculus & CME FedWatch",
763
+ max_items=50,
764
+ height="600px"
765
+ )
766
+ else:
767
+ st.markdown("""
768
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
769
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
770
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading Prediction Markets</div>
771
+ <div style="color: #787B86; font-size: 13px;">Fetching market forecasts...</div>
772
+ </div>
773
+ <style>
774
+ @keyframes pulse {
775
+ 0%, 100% { opacity: 1; transform: scale(1); }
776
+ 50% { opacity: 0.6; transform: scale(1.1); }
777
+ }
778
+ </style>
779
+ """, unsafe_allow_html=True)
780
+
781
+ with col6:
782
+ # Sectoral News Column
783
+ if not sectoral_news_df.empty:
784
+ display_scrollable_news_section(
785
+ sectoral_news_df,
786
+ section_title="Sectoral News",
787
+ section_icon="🏭",
788
+ section_subtitle="7 sectors: Finance, Tech, Energy & more",
789
+ max_items=50,
790
+ height="600px"
791
+ )
792
+ else:
793
+ st.markdown("""
794
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
795
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
796
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading Sectoral News</div>
797
+ <div style="color: #787B86; font-size: 13px;">Aggregating sector-specific news...</div>
798
+ </div>
799
+ <style>
800
+ @keyframes pulse {
801
+ 0%, 100% { opacity: 1; transform: scale(1); }
802
+ 50% { opacity: 0.6; transform: scale(1.1); }
803
+ }
804
+ </style>
805
+ """, unsafe_allow_html=True)
806
+
807
+ with col7:
808
+ # Market Events Column
809
+ if not market_events_df.empty:
810
+ display_scrollable_news_section(
811
+ market_events_df,
812
+ section_title="Market Events",
813
+ section_icon="📈",
814
+ section_subtitle="Earnings, indicators & central banks",
815
+ max_items=50,
816
+ height="600px"
817
+ )
818
+ else:
819
+ st.markdown("""
820
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
821
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
822
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading Market Events</div>
823
+ <div style="color: #787B86; font-size: 13px;">Fetching earnings & economic indicators...</div>
824
+ </div>
825
+ <style>
826
+ @keyframes pulse {
827
+ 0%, 100% { opacity: 1; transform: scale(1); }
828
+ 50% { opacity: 0.6; transform: scale(1.1); }
829
+ }
830
+ </style>
831
+ """, unsafe_allow_html=True)
832
+
833
  # Display fetch errors in expander (less intrusive)
834
  if 'fetch_errors' in locals() and fetch_errors:
835
  with st.expander("⚠️ Source Fetch Warnings", expanded=False):
app/services/economic_calendar.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Economic Calendar Scraper - Investing.com
3
+ Scrapes upcoming economic events, indicators, and releases
4
+ No API key required - web scraping approach
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+ import logging
10
+ import re
11
+
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class EconomicCalendarService:
21
+ """
22
+ Scrapes economic calendar data from Investing.com
23
+ Focus: High and medium importance events
24
+ """
25
+
26
+ def __init__(self):
27
+ """Initialize scraper with session"""
28
+ self.session = requests.Session()
29
+ self.session.headers.update({
30
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
31
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
32
+ 'Accept-Language': 'en-US,en;q=0.9',
33
+ 'Referer': 'https://www.investing.com/',
34
+ })
35
+
36
+ def get_upcoming_events(self, days_ahead: int = 7, min_importance: str = 'medium') -> List[Dict]:
37
+ """
38
+ Get upcoming economic events
39
+ Returns list of events in standardized format
40
+ """
41
+ try:
42
+ # Try to scrape from Investing.com
43
+ events = self._scrape_investing_com(days_ahead, min_importance)
44
+
45
+ if events:
46
+ logger.info(f"Scraped {len(events)} economic events from Investing.com")
47
+ return events
48
+ else:
49
+ logger.warning("No events scraped - using mock data")
50
+ return self._get_mock_events()
51
+
52
+ except Exception as e:
53
+ logger.error(f"Error fetching economic calendar: {e}")
54
+ return self._get_mock_events()
55
+
56
+ def _scrape_investing_com(self, days_ahead: int, min_importance: str) -> List[Dict]:
57
+ """
58
+ Scrape economic calendar from Investing.com
59
+ Note: This may be fragile and break if they change their HTML structure
60
+ """
61
+ try:
62
+ url = 'https://www.investing.com/economic-calendar/'
63
+ response = self.session.get(url, timeout=10)
64
+ response.raise_for_status()
65
+
66
+ soup = BeautifulSoup(response.content, 'html.parser')
67
+ events = []
68
+
69
+ # Investing.com uses a table structure for the calendar
70
+ # Look for table rows with event data
71
+ calendar_table = soup.find('table', {'id': 'economicCalendarData'})
72
+
73
+ if not calendar_table:
74
+ logger.warning("Could not find economic calendar table on Investing.com")
75
+ return []
76
+
77
+ rows = calendar_table.find_all('tr', {'class': 'js-event-item'})
78
+
79
+ for row in rows[:50]: # Limit to 50 events
80
+ try:
81
+ # Extract event data from row
82
+ event_data = self._parse_event_row(row)
83
+
84
+ if event_data and self._should_include_event(event_data, days_ahead, min_importance):
85
+ events.append(event_data)
86
+
87
+ except Exception as e:
88
+ logger.debug(f"Error parsing event row: {e}")
89
+ continue
90
+
91
+ return events
92
+
93
+ except Exception as e:
94
+ logger.error(f"Error scraping Investing.com: {e}")
95
+ return []
96
+
97
+ def _parse_event_row(self, row) -> Optional[Dict]:
98
+ """Parse a single event row from Investing.com table"""
99
+ try:
100
+ # Extract timestamp
101
+ timestamp_elem = row.find('td', {'class': 'first left time'})
102
+ time_str = timestamp_elem.get_text(strip=True) if timestamp_elem else ''
103
+
104
+ # Extract country
105
+ country_elem = row.find('td', {'class': 'flagCur'})
106
+ country = country_elem.get('title', 'US') if country_elem else 'US'
107
+
108
+ # Extract importance (bull icons)
109
+ importance_elem = row.find('td', {'class': 'sentiment'})
110
+ importance = self._parse_importance(importance_elem) if importance_elem else 'low'
111
+
112
+ # Extract event name
113
+ event_elem = row.find('td', {'class': 'left event'})
114
+ event_name = event_elem.get_text(strip=True) if event_elem else ''
115
+
116
+ # Extract actual, forecast, previous values
117
+ actual_elem = row.find('td', {'id': re.compile('eventActual_')})
118
+ forecast_elem = row.find('td', {'id': re.compile('eventForecast_')})
119
+ previous_elem = row.find('td', {'id': re.compile('eventPrevious_')})
120
+
121
+ actual = self._parse_value(actual_elem.get_text(strip=True) if actual_elem else '')
122
+ forecast = self._parse_value(forecast_elem.get_text(strip=True) if forecast_elem else '')
123
+ previous = self._parse_value(previous_elem.get_text(strip=True) if previous_elem else '')
124
+
125
+ # Create event dictionary
126
+ event_date = self._parse_event_time(time_str)
127
+ time_to_event = self._calculate_time_to_event(event_date)
128
+
129
+ return {
130
+ 'id': hash(f"{event_name}_{event_date}_{country}"),
131
+ 'title': f"{country} - {event_name}",
132
+ 'event_name': event_name,
133
+ 'event_date': event_date,
134
+ 'country': country,
135
+ 'category': self._categorize_event(event_name),
136
+ 'importance': importance,
137
+ 'forecast': forecast,
138
+ 'previous': previous,
139
+ 'actual': actual,
140
+ 'time_to_event': time_to_event,
141
+ 'timestamp': datetime.now(),
142
+ 'source': 'Investing.com',
143
+ 'url': 'https://www.investing.com/economic-calendar/',
144
+ 'impact': importance, # Map importance to impact
145
+ 'sentiment': self._determine_sentiment(actual, forecast, previous)
146
+ }
147
+
148
+ except Exception as e:
149
+ logger.debug(f"Error parsing event row: {e}")
150
+ return None
151
+
152
+ def _parse_importance(self, importance_elem) -> str:
153
+ """Parse importance from bull icons"""
154
+ if not importance_elem:
155
+ return 'low'
156
+
157
+ # Investing.com uses bull icons (1-3 bulls)
158
+ bulls = importance_elem.find_all('i', {'class': 'grayFullBullishIcon'})
159
+ num_bulls = len(bulls)
160
+
161
+ if num_bulls >= 3:
162
+ return 'high'
163
+ elif num_bulls == 2:
164
+ return 'medium'
165
+ else:
166
+ return 'low'
167
+
168
+ def _parse_value(self, value_str: str) -> Optional[float]:
169
+ """Parse numeric value from string"""
170
+ if not value_str or value_str == '' or value_str == '-':
171
+ return None
172
+
173
+ try:
174
+ # Remove % sign, K, M, B suffixes
175
+ value_str = value_str.replace('%', '').replace('K', '').replace('M', '').replace('B', '')
176
+ value_str = value_str.replace(',', '')
177
+ return float(value_str)
178
+ except:
179
+ return None
180
+
181
+ def _parse_event_time(self, time_str: str) -> datetime:
182
+ """Parse event time string to datetime"""
183
+ try:
184
+ # Investing.com uses formats like "10:00" or "All Day"
185
+ if 'All Day' in time_str or not time_str:
186
+ # Default to noon today
187
+ return datetime.now().replace(hour=12, minute=0, second=0, microsecond=0)
188
+
189
+ # Parse time (assumes today for now - real implementation would need date context)
190
+ time_parts = time_str.split(':')
191
+ hour = int(time_parts[0])
192
+ minute = int(time_parts[1]) if len(time_parts) > 1 else 0
193
+
194
+ event_time = datetime.now().replace(hour=hour, minute=minute, second=0, microsecond=0)
195
+
196
+ # If time has passed today, assume it's tomorrow
197
+ if event_time < datetime.now():
198
+ event_time += timedelta(days=1)
199
+
200
+ return event_time
201
+
202
+ except Exception as e:
203
+ logger.debug(f"Error parsing time: {e}")
204
+ return datetime.now() + timedelta(hours=2)
205
+
206
+ def _calculate_time_to_event(self, event_date: datetime) -> str:
207
+ """Calculate human-readable time until event"""
208
+ delta = event_date - datetime.now()
209
+
210
+ if delta.total_seconds() < 0:
211
+ return "In progress"
212
+
213
+ days = delta.days
214
+ hours = delta.seconds // 3600
215
+ minutes = (delta.seconds % 3600) // 60
216
+
217
+ if days > 0:
218
+ return f"in {days}d {hours}h"
219
+ elif hours > 0:
220
+ return f"in {hours}h {minutes}m"
221
+ else:
222
+ return f"in {minutes}m"
223
+
224
+ def _categorize_event(self, event_name: str) -> str:
225
+ """Categorize economic event"""
226
+ event_lower = event_name.lower()
227
+
228
+ if any(kw in event_lower for kw in ['cpi', 'inflation', 'pce', 'price']):
229
+ return 'inflation'
230
+ elif any(kw in event_lower for kw in ['employment', 'jobs', 'unemployment', 'nfp', 'payroll']):
231
+ return 'employment'
232
+ elif any(kw in event_lower for kw in ['gdp', 'growth']):
233
+ return 'gdp'
234
+ elif any(kw in event_lower for kw in ['fed', 'fomc', 'ecb', 'rate', 'boe', 'boj']):
235
+ return 'central_bank'
236
+ elif any(kw in event_lower for kw in ['pmi', 'manufacturing', 'services']):
237
+ return 'pmi'
238
+ else:
239
+ return 'other'
240
+
241
+ def _determine_sentiment(self, actual: Optional[float], forecast: Optional[float], previous: Optional[float]) -> str:
242
+ """Determine sentiment based on actual vs forecast"""
243
+ if actual is None or forecast is None:
244
+ return 'neutral'
245
+
246
+ if actual > forecast:
247
+ return 'positive' # Beat forecast
248
+ elif actual < forecast:
249
+ return 'negative' # Missed forecast
250
+ else:
251
+ return 'neutral'
252
+
253
+ def _should_include_event(self, event: Dict, days_ahead: int, min_importance: str) -> bool:
254
+ """Determine if event should be included"""
255
+ # Filter by importance
256
+ importance_levels = ['low', 'medium', 'high']
257
+ min_level = importance_levels.index(min_importance)
258
+ event_level = importance_levels.index(event['importance'])
259
+
260
+ if event_level < min_level:
261
+ return False
262
+
263
+ # Filter by date range
264
+ days_until = (event['event_date'] - datetime.now()).days
265
+ if days_until > days_ahead:
266
+ return False
267
+
268
+ return True
269
+
270
+ def _get_mock_events(self) -> List[Dict]:
271
+ """Mock economic events for development/testing"""
272
+ now = datetime.now()
273
+
274
+ return [
275
+ {
276
+ 'id': 1,
277
+ 'title': 'US - Consumer Price Index (CPI)',
278
+ 'event_name': 'Consumer Price Index',
279
+ 'event_date': now + timedelta(hours=2),
280
+ 'country': 'US',
281
+ 'category': 'inflation',
282
+ 'importance': 'high',
283
+ 'forecast': 2.5,
284
+ 'previous': 2.3,
285
+ 'actual': None,
286
+ 'time_to_event': 'in 2h 0m',
287
+ 'timestamp': now,
288
+ 'source': 'Economic Calendar',
289
+ 'url': 'https://www.investing.com/economic-calendar/',
290
+ 'impact': 'high',
291
+ 'sentiment': 'neutral'
292
+ },
293
+ {
294
+ 'id': 2,
295
+ 'title': 'US - Non-Farm Payrolls (NFP)',
296
+ 'event_name': 'Non-Farm Payrolls',
297
+ 'event_date': now + timedelta(days=2, hours=8, minutes=30),
298
+ 'country': 'US',
299
+ 'category': 'employment',
300
+ 'importance': 'high',
301
+ 'forecast': 180.0,
302
+ 'previous': 175.0,
303
+ 'actual': None,
304
+ 'time_to_event': 'in 2d 8h',
305
+ 'timestamp': now,
306
+ 'source': 'Economic Calendar',
307
+ 'url': 'https://www.investing.com/economic-calendar/',
308
+ 'impact': 'high',
309
+ 'sentiment': 'neutral'
310
+ },
311
+ {
312
+ 'id': 3,
313
+ 'title': 'EU - ECB Interest Rate Decision',
314
+ 'event_name': 'ECB Interest Rate Decision',
315
+ 'event_date': now + timedelta(days=3, hours=12),
316
+ 'country': 'EU',
317
+ 'category': 'central_bank',
318
+ 'importance': 'high',
319
+ 'forecast': 3.75,
320
+ 'previous': 4.00,
321
+ 'actual': None,
322
+ 'time_to_event': 'in 3d 12h',
323
+ 'timestamp': now,
324
+ 'source': 'Economic Calendar',
325
+ 'url': 'https://www.investing.com/economic-calendar/',
326
+ 'impact': 'high',
327
+ 'sentiment': 'neutral'
328
+ },
329
+ {
330
+ 'id': 4,
331
+ 'title': 'US - GDP Growth Rate',
332
+ 'event_name': 'GDP Growth Rate',
333
+ 'event_date': now + timedelta(days=5, hours=8, minutes=30),
334
+ 'country': 'US',
335
+ 'category': 'gdp',
336
+ 'importance': 'high',
337
+ 'forecast': 2.8,
338
+ 'previous': 2.5,
339
+ 'actual': None,
340
+ 'time_to_event': 'in 5d 8h',
341
+ 'timestamp': now,
342
+ 'source': 'Economic Calendar',
343
+ 'url': 'https://www.investing.com/economic-calendar/',
344
+ 'impact': 'high',
345
+ 'sentiment': 'neutral'
346
+ },
347
+ {
348
+ 'id': 5,
349
+ 'title': 'US - Manufacturing PMI',
350
+ 'event_name': 'Manufacturing PMI',
351
+ 'event_date': now + timedelta(days=1, hours=10),
352
+ 'country': 'US',
353
+ 'category': 'pmi',
354
+ 'importance': 'medium',
355
+ 'forecast': 51.5,
356
+ 'previous': 50.8,
357
+ 'actual': None,
358
+ 'time_to_event': 'in 1d 10h',
359
+ 'timestamp': now,
360
+ 'source': 'Economic Calendar',
361
+ 'url': 'https://www.investing.com/economic-calendar/',
362
+ 'impact': 'medium',
363
+ 'sentiment': 'neutral'
364
+ }
365
+ ]
366
+
367
+ def get_todays_events(self) -> List[Dict]:
368
+ """Get events happening today"""
369
+ all_events = self.get_upcoming_events(days_ahead=1)
370
+ today = datetime.now().date()
371
+
372
+ todays_events = [
373
+ event for event in all_events
374
+ if event['event_date'].date() == today
375
+ ]
376
+
377
+ return todays_events
app/services/market_events.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Market Events Scraper - Earnings, Economic Indicators & Central Bank Events
3
+ Aggregates upcoming and recent market-moving events
4
+ Web scraping approach - no API keys required
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+ import logging
10
+ import re
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ import requests
14
+ import feedparser
15
+ from bs4 import BeautifulSoup
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class MarketEventsScraper:
23
+ """
24
+ Scrapes market events from multiple sources
25
+ Focus: Earnings, economic indicators, central bank announcements
26
+ """
27
+
28
+ # Central bank RSS feeds (already in use for news)
29
+ CENTRAL_BANKS = {
30
+ 'fed': {
31
+ 'name': 'Federal Reserve',
32
+ 'rss': 'https://www.federalreserve.gov/feeds/press_all.xml',
33
+ 'weight': 2.0
34
+ },
35
+ 'ecb': {
36
+ 'name': 'European Central Bank',
37
+ 'rss': 'https://www.ecb.europa.eu/rss/press.xml',
38
+ 'weight': 2.0
39
+ }
40
+ }
41
+
42
+ def __init__(self):
43
+ """Initialize scraper"""
44
+ self.session = requests.Session()
45
+ self.session.headers.update({
46
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
47
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
48
+ 'Accept-Language': 'en-US,en;q=0.9',
49
+ })
50
+
51
+ def scrape_market_events(self, max_items: int = 50, days_ahead: int = 14) -> List[Dict]:
52
+ """
53
+ Scrape market events from all sources
54
+ Returns unified list sorted by date and impact
55
+ """
56
+ all_events = []
57
+ seen_urls = set()
58
+
59
+ # Parallel fetching
60
+ with ThreadPoolExecutor(max_workers=3) as executor:
61
+ futures = []
62
+
63
+ # Submit tasks
64
+ futures.append((executor.submit(self._fetch_earnings), 'earnings'))
65
+ futures.append((executor.submit(self._fetch_economic_indicators), 'indicators'))
66
+ futures.append((executor.submit(self._fetch_central_bank_events), 'central_banks'))
67
+
68
+ for future, source_type in futures:
69
+ try:
70
+ events = future.result(timeout=35)
71
+
72
+ # Deduplicate by URL
73
+ for event in events:
74
+ if event['url'] not in seen_urls:
75
+ seen_urls.add(event['url'])
76
+ all_events.append(event)
77
+
78
+ logger.info(f"Fetched {len(events)} events from {source_type}")
79
+
80
+ except Exception as e:
81
+ logger.error(f"Error fetching {source_type}: {e}")
82
+
83
+ # If no events fetched, use mock data
84
+ if not all_events:
85
+ logger.warning("No market events fetched - using mock data")
86
+ return self._get_mock_events()
87
+
88
+ # Sort by event date and impact
89
+ all_events.sort(
90
+ key=lambda x: (x.get('event_date', x['timestamp']), x['impact'] != 'high'),
91
+ )
92
+
93
+ return all_events[:max_items]
94
+
95
+ def _fetch_earnings(self) -> List[Dict]:
96
+ """
97
+ Fetch earnings calendar from Yahoo Finance
98
+ Web scraping approach
99
+ """
100
+ try:
101
+ url = 'https://finance.yahoo.com/calendar/earnings'
102
+ response = self.session.get(url, timeout=10)
103
+ response.raise_for_status()
104
+
105
+ soup = BeautifulSoup(response.content, 'html.parser')
106
+ events = []
107
+
108
+ # Yahoo Finance uses a table for earnings
109
+ table = soup.find('table', {'class': re.compile('earnings')})
110
+
111
+ if not table:
112
+ logger.warning("Could not find earnings table on Yahoo Finance")
113
+ return self._get_mock_earnings()
114
+
115
+ rows = table.find_all('tr')[1:20] # Skip header, limit to 20
116
+
117
+ for row in rows:
118
+ try:
119
+ cells = row.find_all('td')
120
+ if len(cells) < 4:
121
+ continue
122
+
123
+ # Parse cells
124
+ ticker = cells[0].get_text(strip=True)
125
+ company = cells[1].get_text(strip=True) if len(cells) > 1 else ticker
126
+ eps_estimate = cells[2].get_text(strip=True) if len(cells) > 2 else 'N/A'
127
+ reported_eps = cells[3].get_text(strip=True) if len(cells) > 3 else None
128
+ event_time = cells[4].get_text(strip=True) if len(cells) > 4 else 'N/A'
129
+
130
+ # Create event
131
+ event_date = self._parse_earnings_date(event_time)
132
+
133
+ events.append({
134
+ 'id': hash(f"earnings_{ticker}_{event_date}"),
135
+ 'title': f"{company} ({ticker}) Earnings Report",
136
+ 'summary': f"Expected EPS: {eps_estimate}" + (f", Reported: {reported_eps}" if reported_eps and reported_eps != 'N/A' else ''),
137
+ 'source': 'Yahoo Finance',
138
+ 'category': 'earnings',
139
+ 'timestamp': datetime.now(),
140
+ 'event_date': event_date,
141
+ 'url': f"https://finance.yahoo.com/quote/{ticker}",
142
+ 'event_type': 'earnings',
143
+ 'ticker': ticker,
144
+ 'expected_value': self._parse_float(eps_estimate),
145
+ 'actual_value': self._parse_float(reported_eps) if reported_eps else None,
146
+ 'previous_value': None,
147
+ 'impact': 'medium', # Earnings are generally medium impact
148
+ 'sentiment': self._determine_earnings_sentiment(eps_estimate, reported_eps),
149
+ 'is_breaking': False,
150
+ 'source_weight': 1.3,
151
+ 'likes': 0,
152
+ 'retweets': 0
153
+ })
154
+
155
+ except Exception as e:
156
+ logger.debug(f"Error parsing earnings row: {e}")
157
+ continue
158
+
159
+ return events if events else self._get_mock_earnings()
160
+
161
+ except Exception as e:
162
+ logger.error(f"Error fetching earnings: {e}")
163
+ return self._get_mock_earnings()
164
+
165
+ def _fetch_economic_indicators(self) -> List[Dict]:
166
+ """
167
+ Fetch economic indicators from FRED and other sources
168
+ Uses RSS feeds
169
+ """
170
+ try:
171
+ events = []
172
+
173
+ # FRED Economic Data releases (via RSS - if available)
174
+ # For now, use mock data as FRED RSS is primarily historical data
175
+ # Real implementation would scrape FRED release calendar
176
+
177
+ events.extend(self._get_mock_indicators())
178
+
179
+ return events
180
+
181
+ except Exception as e:
182
+ logger.error(f"Error fetching economic indicators: {e}")
183
+ return self._get_mock_indicators()
184
+
185
+ def _fetch_central_bank_events(self) -> List[Dict]:
186
+ """
187
+ Fetch central bank announcements from RSS feeds
188
+ """
189
+ events = []
190
+
191
+ for bank_id, bank_info in self.CENTRAL_BANKS.items():
192
+ try:
193
+ feed = feedparser.parse(bank_info['rss'])
194
+
195
+ for entry in feed.entries[:10]:
196
+ try:
197
+ # Parse timestamp
198
+ if hasattr(entry, 'published_parsed') and entry.published_parsed:
199
+ timestamp = datetime(*entry.published_parsed[:6])
200
+ else:
201
+ timestamp = datetime.now()
202
+
203
+ # Skip old events (>7 days)
204
+ if (datetime.now() - timestamp).days > 7:
205
+ continue
206
+
207
+ title = entry.get('title', '')
208
+ summary = entry.get('summary', '') or title
209
+ url = entry.get('link', '')
210
+
211
+ # Clean HTML from summary
212
+ if summary:
213
+ summary = BeautifulSoup(summary, 'html.parser').get_text()
214
+ summary = summary[:200] + '...' if len(summary) > 200 else summary
215
+
216
+ events.append({
217
+ 'id': hash(url),
218
+ 'title': f"{bank_info['name']}: {title}",
219
+ 'summary': summary,
220
+ 'source': bank_info['name'],
221
+ 'category': 'central_bank',
222
+ 'timestamp': timestamp,
223
+ 'event_date': timestamp,
224
+ 'url': url,
225
+ 'event_type': 'central_bank_announcement',
226
+ 'ticker': None,
227
+ 'expected_value': None,
228
+ 'actual_value': None,
229
+ 'previous_value': None,
230
+ 'impact': 'high', # Central bank events are high impact
231
+ 'sentiment': 'neutral',
232
+ 'is_breaking': (datetime.now() - timestamp).days < 1,
233
+ 'source_weight': bank_info['weight'],
234
+ 'likes': 0,
235
+ 'retweets': 0
236
+ })
237
+
238
+ except Exception as e:
239
+ logger.debug(f"Error parsing {bank_id} entry: {e}")
240
+ continue
241
+
242
+ except Exception as e:
243
+ logger.error(f"Error fetching {bank_id} RSS: {e}")
244
+
245
+ return events
246
+
247
+ def _parse_earnings_date(self, time_str: str) -> datetime:
248
+ """Parse earnings report time"""
249
+ # Yahoo Finance uses "Before Market Open", "After Market Close", or specific dates
250
+ now = datetime.now()
251
+
252
+ if 'Before Market' in time_str or 'BMO' in time_str:
253
+ return now.replace(hour=7, minute=0, second=0, microsecond=0)
254
+ elif 'After Market' in time_str or 'AMC' in time_str:
255
+ return now.replace(hour=16, minute=0, second=0, microsecond=0)
256
+ else:
257
+ # Default to tomorrow morning
258
+ return (now + timedelta(days=1)).replace(hour=7, minute=0, second=0, microsecond=0)
259
+
260
+ def _parse_float(self, value_str: str) -> Optional[float]:
261
+ """Parse float from string"""
262
+ if not value_str or value_str == 'N/A' or value_str == '-':
263
+ return None
264
+
265
+ try:
266
+ # Remove $ and other non-numeric characters except . and -
267
+ cleaned = re.sub(r'[^\d.-]', '', value_str)
268
+ return float(cleaned)
269
+ except:
270
+ return None
271
+
272
+ def _determine_earnings_sentiment(self, expected: str, actual: Optional[str]) -> str:
273
+ """Determine sentiment based on earnings beat/miss"""
274
+ if not actual or actual == 'N/A':
275
+ return 'neutral'
276
+
277
+ exp_val = self._parse_float(expected)
278
+ act_val = self._parse_float(actual)
279
+
280
+ if exp_val is None or act_val is None:
281
+ return 'neutral'
282
+
283
+ if act_val > exp_val:
284
+ return 'positive' # Beat
285
+ elif act_val < exp_val:
286
+ return 'negative' # Miss
287
+ else:
288
+ return 'neutral' # In-line
289
+
290
+ def _get_mock_earnings(self) -> List[Dict]:
291
+ """Mock earnings data"""
292
+ now = datetime.now()
293
+
294
+ return [
295
+ {
296
+ 'id': 1,
297
+ 'title': 'Apple Inc. (AAPL) Earnings Report',
298
+ 'summary': 'Expected EPS: $2.10',
299
+ 'source': 'Yahoo Finance',
300
+ 'category': 'earnings',
301
+ 'timestamp': now,
302
+ 'event_date': now + timedelta(days=2, hours=16),
303
+ 'url': 'https://finance.yahoo.com/quote/AAPL',
304
+ 'event_type': 'earnings',
305
+ 'ticker': 'AAPL',
306
+ 'expected_value': 2.10,
307
+ 'actual_value': None,
308
+ 'previous_value': 1.95,
309
+ 'impact': 'high',
310
+ 'sentiment': 'neutral',
311
+ 'is_breaking': False,
312
+ 'source_weight': 1.5,
313
+ 'likes': 0,
314
+ 'retweets': 0
315
+ },
316
+ {
317
+ 'id': 2,
318
+ 'title': 'Microsoft Corporation (MSFT) Earnings Report',
319
+ 'summary': 'Expected EPS: $2.75',
320
+ 'source': 'Yahoo Finance',
321
+ 'category': 'earnings',
322
+ 'timestamp': now,
323
+ 'event_date': now + timedelta(days=3, hours=16),
324
+ 'url': 'https://finance.yahoo.com/quote/MSFT',
325
+ 'event_type': 'earnings',
326
+ 'ticker': 'MSFT',
327
+ 'expected_value': 2.75,
328
+ 'actual_value': None,
329
+ 'previous_value': 2.50,
330
+ 'impact': 'high',
331
+ 'sentiment': 'neutral',
332
+ 'is_breaking': False,
333
+ 'source_weight': 1.5,
334
+ 'likes': 0,
335
+ 'retweets': 0
336
+ }
337
+ ]
338
+
339
+ def _get_mock_indicators(self) -> List[Dict]:
340
+ """Mock economic indicator data"""
341
+ now = datetime.now()
342
+
343
+ return [
344
+ {
345
+ 'id': 3,
346
+ 'title': 'US Retail Sales Data Release',
347
+ 'summary': 'Monthly retail sales figures',
348
+ 'source': 'US Census Bureau',
349
+ 'category': 'economic_indicator',
350
+ 'timestamp': now,
351
+ 'event_date': now + timedelta(days=1, hours=8, minutes=30),
352
+ 'url': 'https://www.census.gov/retail/',
353
+ 'event_type': 'retail_sales',
354
+ 'ticker': None,
355
+ 'expected_value': 0.5,
356
+ 'actual_value': None,
357
+ 'previous_value': 0.3,
358
+ 'impact': 'medium',
359
+ 'sentiment': 'neutral',
360
+ 'is_breaking': False,
361
+ 'source_weight': 1.6,
362
+ 'likes': 0,
363
+ 'retweets': 0
364
+ }
365
+ ]
366
+
367
+ def _get_mock_events(self) -> List[Dict]:
368
+ """Combined mock data"""
369
+ return self._get_mock_earnings() + self._get_mock_indicators() + [
370
+ {
371
+ 'id': 4,
372
+ 'title': 'Federal Reserve: FOMC Meeting Minutes Released',
373
+ 'summary': 'Minutes from the latest Federal Open Market Committee meeting',
374
+ 'source': 'Federal Reserve',
375
+ 'category': 'central_bank',
376
+ 'timestamp': datetime.now() - timedelta(hours=2),
377
+ 'event_date': datetime.now() - timedelta(hours=2),
378
+ 'url': 'https://www.federalreserve.gov/',
379
+ 'event_type': 'central_bank_announcement',
380
+ 'ticker': None,
381
+ 'expected_value': None,
382
+ 'actual_value': None,
383
+ 'previous_value': None,
384
+ 'impact': 'high',
385
+ 'sentiment': 'neutral',
386
+ 'is_breaking': True,
387
+ 'source_weight': 2.0,
388
+ 'likes': 0,
389
+ 'retweets': 0
390
+ }
391
+ ]
app/services/prediction_markets.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prediction Markets Scraper - Polymarket, Metaculus & CME FedWatch
3
+ Aggregates market predictions for financial, political, and geopolitical events
4
+ No authentication required - all free/public APIs
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+ import logging
10
+ import re
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ import requests
14
+ import pandas as pd
15
+ from bs4 import BeautifulSoup
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class PredictionMarketsScraper:
23
+ """
24
+ Scrapes prediction market data from multiple sources
25
+ Focus: Economics, geopolitics, markets
26
+ """
27
+
28
+ # Source configuration
29
+ SOURCES = {
30
+ 'polymarket': {
31
+ 'name': 'Polymarket',
32
+ 'base_url': 'https://clob.polymarket.com',
33
+ 'weight': 1.8,
34
+ 'enabled': True
35
+ },
36
+ 'metaculus': {
37
+ 'name': 'Metaculus',
38
+ 'base_url': 'https://www.metaculus.com/api',
39
+ 'weight': 1.6,
40
+ 'enabled': True
41
+ },
42
+ 'cme_fedwatch': {
43
+ 'name': 'CME FedWatch',
44
+ 'url': 'https://www.cmegroup.com/markets/interest-rates/cme-fedwatch-tool.html',
45
+ 'weight': 2.0,
46
+ 'enabled': True
47
+ }
48
+ }
49
+
50
+ # Category keywords
51
+ MACRO_KEYWORDS = ['Fed', 'ECB', 'inflation', 'CPI', 'GDP', 'rate', 'economy']
52
+ MARKETS_KEYWORDS = ['stock', 'market', 'S&P', 'Dow', 'price', 'Bitcoin', 'crypto']
53
+ GEOPOLITICAL_KEYWORDS = ['election', 'war', 'Trump', 'Biden', 'China', 'Russia', 'Ukraine']
54
+
55
+ def __init__(self):
56
+ """Initialize scraper with session"""
57
+ self.session = requests.Session()
58
+ self.session.headers.update({
59
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
60
+ 'Accept': 'application/json',
61
+ 'Accept-Language': 'en-US,en;q=0.9',
62
+ })
63
+
64
+ def scrape_predictions(self, max_items: int = 50) -> List[Dict]:
65
+ """
66
+ Scrape predictions from all enabled sources
67
+ Returns unified list of prediction markets
68
+ """
69
+ all_predictions = []
70
+ seen_titles = set()
71
+
72
+ # Parallel fetching
73
+ with ThreadPoolExecutor(max_workers=3) as executor:
74
+ futures = []
75
+
76
+ if self.SOURCES['polymarket']['enabled']:
77
+ futures.append((executor.submit(self._fetch_polymarket), 'polymarket'))
78
+
79
+ if self.SOURCES['metaculus']['enabled']:
80
+ futures.append((executor.submit(self._fetch_metaculus), 'metaculus'))
81
+
82
+ if self.SOURCES['cme_fedwatch']['enabled']:
83
+ futures.append((executor.submit(self._fetch_cme_fedwatch), 'cme_fedwatch'))
84
+
85
+ for future, source_name in futures:
86
+ try:
87
+ predictions = future.result(timeout=35)
88
+
89
+ # Deduplicate by title similarity
90
+ for pred in predictions:
91
+ title_norm = pred['title'].lower().strip()
92
+ if title_norm not in seen_titles:
93
+ seen_titles.add(title_norm)
94
+ all_predictions.append(pred)
95
+
96
+ logger.info(f"Fetched {len(predictions)} predictions from {source_name}")
97
+
98
+ except Exception as e:
99
+ logger.error(f"Error fetching {source_name}: {e}")
100
+
101
+ # If no predictions fetched, use mock data
102
+ if not all_predictions:
103
+ logger.warning("No predictions fetched - using mock data")
104
+ return self._get_mock_predictions()
105
+
106
+ # Sort by volume (if available) and impact
107
+ all_predictions.sort(
108
+ key=lambda x: (x['impact'] == 'high', x.get('volume', 0)),
109
+ reverse=True
110
+ )
111
+
112
+ return all_predictions[:max_items]
113
+
114
+ def _fetch_polymarket(self) -> List[Dict]:
115
+ """Fetch predictions from Polymarket API"""
116
+ try:
117
+ # Polymarket CLOB API - get active markets
118
+ url = f"{self.SOURCES['polymarket']['base_url']}/markets"
119
+
120
+ response = self.session.get(url, timeout=10)
121
+ response.raise_for_status()
122
+
123
+ markets = response.json()
124
+ predictions = []
125
+
126
+ for market in markets[:30]: # Limit to 30 most recent
127
+ try:
128
+ # Parse market data
129
+ title = market.get('question', '')
130
+ if not title or len(title) < 10:
131
+ continue
132
+
133
+ # Get probabilities (0-1 range, convert to 0-100)
134
+ yes_prob = float(market.get('outcome_prices', ['0.5', '0.5'])[0]) * 100
135
+ no_prob = 100 - yes_prob
136
+
137
+ # Calculate volume
138
+ volume = float(market.get('volume', 0))
139
+
140
+ # Category classification
141
+ category = self._categorize_prediction(title)
142
+
143
+ # Impact based on volume
144
+ impact = self._assess_impact(volume, category)
145
+
146
+ # Sentiment from probability
147
+ sentiment = 'positive' if yes_prob > 60 else ('negative' if yes_prob < 40 else 'neutral')
148
+
149
+ # End date
150
+ end_date_str = market.get('end_date_iso', '')
151
+ try:
152
+ end_date = datetime.fromisoformat(end_date_str.replace('Z', '+00:00'))
153
+ except:
154
+ end_date = datetime.now() + timedelta(days=30)
155
+
156
+ predictions.append({
157
+ 'id': hash(market.get('condition_id', title)),
158
+ 'title': title,
159
+ 'summary': f"Market probability: {yes_prob:.1f}% YES, {no_prob:.1f}% NO",
160
+ 'source': 'Polymarket',
161
+ 'category': category,
162
+ 'timestamp': datetime.now(),
163
+ 'url': f"https://polymarket.com/event/{market.get('slug', '')}",
164
+ 'yes_probability': round(yes_prob, 1),
165
+ 'no_probability': round(no_prob, 1),
166
+ 'volume': volume,
167
+ 'end_date': end_date,
168
+ 'impact': impact,
169
+ 'sentiment': sentiment,
170
+ 'is_breaking': False,
171
+ 'source_weight': self.SOURCES['polymarket']['weight'],
172
+ 'likes': int(volume / 1000), # Approximate engagement from volume
173
+ 'retweets': 0
174
+ })
175
+
176
+ except Exception as e:
177
+ logger.debug(f"Error parsing Polymarket market: {e}")
178
+ continue
179
+
180
+ return predictions
181
+
182
+ except Exception as e:
183
+ logger.error(f"Error fetching Polymarket: {e}")
184
+ return []
185
+
186
+ def _fetch_metaculus(self) -> List[Dict]:
187
+ """Fetch predictions from Metaculus API"""
188
+ try:
189
+ # Metaculus API - get open questions
190
+ url = f"{self.SOURCES['metaculus']['base_url']}/questions/"
191
+ params = {
192
+ 'status': 'open',
193
+ 'type': 'forecast',
194
+ 'order_by': '-activity',
195
+ 'limit': 30
196
+ }
197
+
198
+ response = self.session.get(url, params=params, timeout=10)
199
+ response.raise_for_status()
200
+
201
+ data = response.json()
202
+ questions = data.get('results', [])
203
+ predictions = []
204
+
205
+ for q in questions:
206
+ try:
207
+ title = q.get('title', '')
208
+ if not title or len(title) < 10:
209
+ continue
210
+
211
+ # Get community prediction
212
+ community_prediction = q.get('community_prediction', {})
213
+ if not community_prediction:
214
+ continue
215
+
216
+ # For binary questions
217
+ if q.get('possibilities', {}).get('type') == 'binary':
218
+ yes_prob = float(community_prediction.get('q2', 0.5)) * 100
219
+ no_prob = 100 - yes_prob
220
+ else:
221
+ # Skip non-binary for now
222
+ continue
223
+
224
+ # Category classification
225
+ category = self._categorize_prediction(title)
226
+
227
+ # Impact based on number of forecasters
228
+ num_forecasters = q.get('number_of_forecasters', 0)
229
+ impact = 'high' if num_forecasters > 100 else ('medium' if num_forecasters > 20 else 'low')
230
+
231
+ # Sentiment
232
+ sentiment = 'positive' if yes_prob > 60 else ('negative' if yes_prob < 40 else 'neutral')
233
+
234
+ # Close date
235
+ close_time_str = q.get('close_time', '')
236
+ try:
237
+ close_time = datetime.fromisoformat(close_time_str.replace('Z', '+00:00'))
238
+ except:
239
+ close_time = datetime.now() + timedelta(days=30)
240
+
241
+ predictions.append({
242
+ 'id': q.get('id', hash(title)),
243
+ 'title': title,
244
+ 'summary': f"Community forecast: {yes_prob:.1f}% likelihood ({num_forecasters} forecasters)",
245
+ 'source': 'Metaculus',
246
+ 'category': category,
247
+ 'timestamp': datetime.now(),
248
+ 'url': q.get('url', f"https://www.metaculus.com/questions/{q.get('id')}"),
249
+ 'yes_probability': round(yes_prob, 1),
250
+ 'no_probability': round(no_prob, 1),
251
+ 'volume': 0, # Metaculus doesn't have trading volume
252
+ 'end_date': close_time,
253
+ 'impact': impact,
254
+ 'sentiment': sentiment,
255
+ 'is_breaking': False,
256
+ 'source_weight': self.SOURCES['metaculus']['weight'],
257
+ 'likes': num_forecasters,
258
+ 'retweets': 0
259
+ })
260
+
261
+ except Exception as e:
262
+ logger.debug(f"Error parsing Metaculus question: {e}")
263
+ continue
264
+
265
+ return predictions
266
+
267
+ except Exception as e:
268
+ logger.error(f"Error fetching Metaculus: {e}")
269
+ return []
270
+
271
+ def _fetch_cme_fedwatch(self) -> List[Dict]:
272
+ """
273
+ Fetch Fed rate probabilities from CME FedWatch Tool
274
+ Note: This is web scraping and may be fragile
275
+ """
276
+ try:
277
+ url = self.SOURCES['cme_fedwatch']['url']
278
+ response = self.session.get(url, timeout=10)
279
+ response.raise_for_status()
280
+
281
+ soup = BeautifulSoup(response.content, 'html.parser')
282
+
283
+ # CME FedWatch has a data table with meeting dates and probabilities
284
+ # This is a simplified version - actual implementation may need adjustment
285
+ # based on current page structure
286
+
287
+ predictions = []
288
+
289
+ # Try to find probability data in script tags (CME often embeds data in JSON)
290
+ scripts = soup.find_all('script')
291
+ for script in scripts:
292
+ if script.string and 'probability' in script.string.lower():
293
+ # This would need custom parsing based on CME's data format
294
+ # For now, create mock Fed predictions
295
+ logger.warning("CME FedWatch scraping not fully implemented - using mock Fed data")
296
+ break
297
+
298
+ # Fallback: Create mock Fed rate prediction
299
+ next_fomc = datetime.now() + timedelta(days=45) # Approximate next FOMC
300
+ predictions.append({
301
+ 'id': hash('fed_rate_' + next_fomc.strftime('%Y%m%d')),
302
+ 'title': f'Fed Rate Decision - {next_fomc.strftime("%B %Y")} FOMC',
303
+ 'summary': 'Market-implied probability of rate changes based on fed funds futures',
304
+ 'source': 'CME FedWatch',
305
+ 'category': 'macro',
306
+ 'timestamp': datetime.now(),
307
+ 'url': url,
308
+ 'yes_probability': 65.0, # Probability of rate cut
309
+ 'no_probability': 35.0, # Probability of no change
310
+ 'volume': 0,
311
+ 'end_date': next_fomc,
312
+ 'impact': 'high',
313
+ 'sentiment': 'neutral',
314
+ 'is_breaking': False,
315
+ 'source_weight': self.SOURCES['cme_fedwatch']['weight'],
316
+ 'likes': 0,
317
+ 'retweets': 0
318
+ })
319
+
320
+ return predictions
321
+
322
+ except Exception as e:
323
+ logger.error(f"Error fetching CME FedWatch: {e}")
324
+ return []
325
+
326
+ def _categorize_prediction(self, text: str) -> str:
327
+ """Categorize prediction market by keywords"""
328
+ text_lower = text.lower()
329
+
330
+ macro_score = sum(1 for kw in self.MACRO_KEYWORDS if kw.lower() in text_lower)
331
+ market_score = sum(1 for kw in self.MARKETS_KEYWORDS if kw.lower() in text_lower)
332
+ geo_score = sum(1 for kw in self.GEOPOLITICAL_KEYWORDS if kw.lower() in text_lower)
333
+
334
+ scores = {'macro': macro_score, 'markets': market_score, 'geopolitical': geo_score}
335
+ return max(scores, key=scores.get) if max(scores.values()) > 0 else 'markets'
336
+
337
+ def _assess_impact(self, volume: float, category: str) -> str:
338
+ """Assess market impact based on volume and category"""
339
+ # Macro predictions are inherently high impact
340
+ if category == 'macro':
341
+ return 'high'
342
+
343
+ # Volume-based assessment
344
+ if volume > 1000000: # $1M+ volume
345
+ return 'high'
346
+ elif volume > 100000: # $100K+ volume
347
+ return 'medium'
348
+ else:
349
+ return 'low'
350
+
351
+ def _get_mock_predictions(self) -> List[Dict]:
352
+ """Mock prediction data for development/testing"""
353
+ return [
354
+ {
355
+ 'id': 1,
356
+ 'title': 'Will the Fed cut interest rates by March 2025?',
357
+ 'summary': 'Market probability based on fed funds futures and prediction markets',
358
+ 'source': 'CME FedWatch',
359
+ 'category': 'macro',
360
+ 'timestamp': datetime.now(),
361
+ 'url': 'https://www.cmegroup.com/markets/interest-rates/cme-fedwatch-tool.html',
362
+ 'yes_probability': 72.5,
363
+ 'no_probability': 27.5,
364
+ 'volume': 0,
365
+ 'end_date': datetime.now() + timedelta(days=45),
366
+ 'impact': 'high',
367
+ 'sentiment': 'positive',
368
+ 'is_breaking': False,
369
+ 'source_weight': 2.0,
370
+ 'likes': 0,
371
+ 'retweets': 0
372
+ },
373
+ {
374
+ 'id': 2,
375
+ 'title': 'Will Bitcoin reach $100,000 in 2025?',
376
+ 'summary': 'Prediction market consensus on Bitcoin price target',
377
+ 'source': 'Polymarket',
378
+ 'category': 'markets',
379
+ 'timestamp': datetime.now(),
380
+ 'url': 'https://polymarket.com',
381
+ 'yes_probability': 45.0,
382
+ 'no_probability': 55.0,
383
+ 'volume': 2500000,
384
+ 'end_date': datetime.now() + timedelta(days=365),
385
+ 'impact': 'medium',
386
+ 'sentiment': 'neutral',
387
+ 'is_breaking': False,
388
+ 'source_weight': 1.8,
389
+ 'likes': 2500,
390
+ 'retweets': 0
391
+ },
392
+ {
393
+ 'id': 3,
394
+ 'title': 'Will there be a US recession in 2025?',
395
+ 'summary': 'Expert consensus forecast on economic downturn',
396
+ 'source': 'Metaculus',
397
+ 'category': 'macro',
398
+ 'timestamp': datetime.now(),
399
+ 'url': 'https://www.metaculus.com',
400
+ 'yes_probability': 35.0,
401
+ 'no_probability': 65.0,
402
+ 'volume': 0,
403
+ 'end_date': datetime.now() + timedelta(days=365),
404
+ 'impact': 'high',
405
+ 'sentiment': 'negative',
406
+ 'is_breaking': False,
407
+ 'source_weight': 1.6,
408
+ 'likes': 450,
409
+ 'retweets': 0
410
+ }
411
+ ]
app/services/sectoral_news.py ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sectoral News Scraper - 7 Major Market Sectors
3
+ Filters and aggregates news by sector: Finance, Tech, Energy, Healthcare, Consumer, Industrials, Real Estate
4
+ Leverages existing RSS infrastructure with sector-specific classification
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+ import logging
10
+ import re
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ import requests
14
+ import pandas as pd
15
+ import feedparser
16
+ from bs4 import BeautifulSoup
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class SectoralNewsScraper:
24
+ """
25
+ Aggregates news by market sector
26
+ Uses RSS feeds + keyword classification
27
+ """
28
+
29
+ # 7 Sector configuration with keywords and RSS feeds
30
+ SECTORS = {
31
+ 'finance': {
32
+ 'name': 'Finance',
33
+ 'keywords': [
34
+ 'bank', 'JPMorgan', 'Goldman Sachs', 'Morgan Stanley', 'Wells Fargo',
35
+ 'Citigroup', 'Bank of America', 'fintech', 'lending', 'credit',
36
+ 'financial sector', 'banking', 'insurance', 'asset management'
37
+ ],
38
+ 'rss_sources': [
39
+ 'https://www.cnbc.com/id/10000664/device/rss/rss.html', # CNBC Banking
40
+ 'https://feeds.bloomberg.com/markets/news.rss'
41
+ ],
42
+ 'weight': 1.5
43
+ },
44
+ 'tech': {
45
+ 'name': 'Technology',
46
+ 'keywords': [
47
+ 'Apple', 'Microsoft', 'Google', 'Alphabet', 'Amazon', 'Meta', 'Facebook',
48
+ 'NVIDIA', 'AMD', 'Intel', 'semiconductor', 'chip', 'software', 'cloud',
49
+ 'AI', 'artificial intelligence', 'tech sector', 'Silicon Valley', 'Tesla'
50
+ ],
51
+ 'rss_sources': [
52
+ 'https://www.cnbc.com/id/19854910/device/rss/rss.html', # CNBC Technology
53
+ 'https://techcrunch.com/feed/'
54
+ ],
55
+ 'weight': 1.5
56
+ },
57
+ 'energy': {
58
+ 'name': 'Energy',
59
+ 'keywords': [
60
+ 'oil', 'gas', 'crude', 'petroleum', 'OPEC', 'Exxon', 'ExxonMobil', 'Chevron',
61
+ 'ConocoPhillips', 'renewable', 'solar', 'wind', 'energy sector', 'pipeline',
62
+ 'natural gas', 'LNG', 'fracking', 'drilling'
63
+ ],
64
+ 'rss_sources': [
65
+ 'https://www.cnbc.com/id/19832390/device/rss/rss.html', # CNBC Energy
66
+ ],
67
+ 'weight': 1.6
68
+ },
69
+ 'healthcare': {
70
+ 'name': 'Healthcare',
71
+ 'keywords': [
72
+ 'pharma', 'pharmaceutical', 'biotech', 'FDA', 'drug', 'vaccine', 'clinical trial',
73
+ 'Pfizer', 'Johnson & Johnson', 'Merck', 'AbbVie', 'Bristol Myers',
74
+ 'healthcare', 'hospital', 'medical device', 'therapeutics'
75
+ ],
76
+ 'rss_sources': [
77
+ 'https://www.cnbc.com/id/10000108/device/rss/rss.html', # CNBC Health
78
+ ],
79
+ 'weight': 1.5
80
+ },
81
+ 'consumer': {
82
+ 'name': 'Consumer & Retail',
83
+ 'keywords': [
84
+ 'retail', 'Amazon', 'Walmart', 'Target', 'Costco', 'Home Depot',
85
+ 'e-commerce', 'consumer', 'shopping', 'Black Friday', 'sales',
86
+ 'Nike', 'Starbucks', 'McDonald\'s', 'consumer goods', 'discretionary'
87
+ ],
88
+ 'rss_sources': [
89
+ 'https://www.cnbc.com/id/10001009/device/rss/rss.html', # CNBC Retail
90
+ ],
91
+ 'weight': 1.3
92
+ },
93
+ 'industrials': {
94
+ 'name': 'Industrials',
95
+ 'keywords': [
96
+ 'Boeing', 'Airbus', 'Caterpillar', 'Deere', '3M', 'GE', 'General Electric',
97
+ 'Honeywell', 'Lockheed Martin', 'manufacturing', 'industrial',
98
+ 'aerospace', 'defense', 'machinery', 'equipment', 'logistics', 'freight'
99
+ ],
100
+ 'rss_sources': [
101
+ 'https://www.reuters.com/rss/businessNews', # Reuters Business
102
+ ],
103
+ 'weight': 1.4
104
+ },
105
+ 'real_estate': {
106
+ 'name': 'Real Estate',
107
+ 'keywords': [
108
+ 'housing', 'mortgage', 'REIT', 'real estate', 'property', 'home sales',
109
+ 'construction', 'residential', 'commercial real estate', 'housing market',
110
+ 'home prices', 'rent', 'rental', 'builder', 'homebuilder'
111
+ ],
112
+ 'rss_sources': [], # Will rely on keyword filtering from general news
113
+ 'weight': 1.3
114
+ }
115
+ }
116
+
117
+ def __init__(self):
118
+ """Initialize scraper"""
119
+ self.session = requests.Session()
120
+ self.session.headers.update({
121
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
122
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
123
+ 'Accept-Language': 'en-US,en;q=0.9',
124
+ })
125
+
126
+ def scrape_sectoral_news(self, max_items: int = 50, hours: int = 24) -> List[Dict]:
127
+ """
128
+ Scrape and classify news by sector
129
+ Returns aggregated list sorted by sector and timestamp
130
+ """
131
+ all_news = []
132
+ seen_urls = set()
133
+
134
+ # Parallel fetch from all sector RSS feeds
135
+ with ThreadPoolExecutor(max_workers=7) as executor:
136
+ futures = []
137
+
138
+ for sector_id, sector_info in self.SECTORS.items():
139
+ # Submit RSS fetching task for each sector
140
+ futures.append((
141
+ executor.submit(self._fetch_sector_news, sector_id, sector_info, hours),
142
+ sector_id
143
+ ))
144
+
145
+ for future, sector_id in futures:
146
+ try:
147
+ sector_news = future.result(timeout=35)
148
+
149
+ # Deduplicate by URL
150
+ for item in sector_news:
151
+ if item['url'] not in seen_urls:
152
+ seen_urls.add(item['url'])
153
+ all_news.append(item)
154
+
155
+ logger.info(f"Fetched {len(sector_news)} items for {sector_id}")
156
+
157
+ except Exception as e:
158
+ logger.error(f"Error fetching {sector_id} news: {e}")
159
+
160
+ # If no news fetched, use mock data
161
+ if not all_news:
162
+ logger.warning("No sectoral news fetched - using mock data")
163
+ return self._get_mock_sectoral_news()
164
+
165
+ # Sort by sector priority and timestamp
166
+ all_news.sort(
167
+ key=lambda x: (x['sector'] != 'tech', x['sector'] != 'finance', -x['timestamp'].timestamp()),
168
+ )
169
+
170
+ return all_news[:max_items]
171
+
172
+ def _fetch_sector_news(self, sector_id: str, sector_info: Dict, hours: int) -> List[Dict]:
173
+ """Fetch news for a specific sector"""
174
+ sector_news = []
175
+
176
+ # Fetch from sector-specific RSS feeds
177
+ for rss_url in sector_info['rss_sources']:
178
+ try:
179
+ feed_news = self._fetch_rss_feed(rss_url, sector_id, sector_info, hours)
180
+ sector_news.extend(feed_news)
181
+ except Exception as e:
182
+ logger.debug(f"Error fetching RSS {rss_url}: {e}")
183
+
184
+ # If no RSS news, could also filter general news sources by keywords
185
+ # (This would require access to FinanceNewsScraper - skipping for now)
186
+
187
+ return sector_news
188
+
189
+ def _fetch_rss_feed(self, rss_url: str, sector_id: str, sector_info: Dict, hours: int) -> List[Dict]:
190
+ """Fetch and parse RSS feed for sector"""
191
+ try:
192
+ feed = feedparser.parse(rss_url)
193
+
194
+ if not feed.entries:
195
+ return []
196
+
197
+ news_items = []
198
+ cutoff_time = datetime.now() - timedelta(hours=hours)
199
+
200
+ for entry in feed.entries[:15]: # Limit to 15 per feed
201
+ try:
202
+ # Parse timestamp
203
+ if hasattr(entry, 'published_parsed') and entry.published_parsed:
204
+ timestamp = datetime(*entry.published_parsed[:6])
205
+ elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
206
+ timestamp = datetime(*entry.updated_parsed[:6])
207
+ else:
208
+ timestamp = datetime.now()
209
+
210
+ # Skip old news
211
+ if timestamp < cutoff_time:
212
+ continue
213
+
214
+ # Extract title and summary
215
+ title = entry.get('title', '')
216
+ summary = entry.get('summary', '') or entry.get('description', '')
217
+
218
+ # Clean HTML from summary
219
+ if summary:
220
+ summary = BeautifulSoup(summary, 'html.parser').get_text()
221
+ summary = summary[:200] + '...' if len(summary) > 200 else summary
222
+
223
+ url = entry.get('link', '')
224
+
225
+ # Verify sector relevance by keywords
226
+ text = f"{title} {summary}".lower()
227
+ keyword_matches = sum(1 for kw in sector_info['keywords'] if kw.lower() in text)
228
+
229
+ # Skip if not relevant enough (unless from sector-specific feed)
230
+ if keyword_matches == 0 and len(sector_info['rss_sources']) > 3:
231
+ continue
232
+
233
+ # Categorize and analyze
234
+ category = self._categorize_news(text)
235
+ sentiment = self._analyze_sentiment(text)
236
+ impact = self._assess_impact(sector_info['weight'], keyword_matches)
237
+
238
+ news_items.append({
239
+ 'id': hash(url),
240
+ 'title': title,
241
+ 'summary': summary or title[:200],
242
+ 'source': sector_info['name'],
243
+ 'sector': sector_id, # Add sector field
244
+ 'category': category,
245
+ 'timestamp': timestamp,
246
+ 'sentiment': sentiment,
247
+ 'impact': impact,
248
+ 'url': url,
249
+ 'likes': 0,
250
+ 'retweets': 0,
251
+ 'is_breaking': False,
252
+ 'source_weight': sector_info['weight'],
253
+ 'from_web': False
254
+ })
255
+
256
+ except Exception as e:
257
+ logger.debug(f"Error parsing RSS entry: {e}")
258
+ continue
259
+
260
+ return news_items
261
+
262
+ except Exception as e:
263
+ logger.error(f"Error fetching RSS feed {rss_url}: {e}")
264
+ return []
265
+
266
+ def _categorize_news(self, text: str) -> str:
267
+ """Categorize news (macro, markets, geopolitical)"""
268
+ macro_keywords = ['Fed', 'ECB', 'inflation', 'rate', 'GDP', 'economy', 'recession']
269
+ markets_keywords = ['stock', 'earnings', 'revenue', 'profit', 'IPO', 'merger', 'acquisition']
270
+ geo_keywords = ['China', 'tariff', 'trade war', 'sanctions', 'regulation']
271
+
272
+ macro_score = sum(1 for kw in macro_keywords if kw.lower() in text)
273
+ markets_score = sum(1 for kw in markets_keywords if kw.lower() in text)
274
+ geo_score = sum(1 for kw in geo_keywords if kw.lower() in text)
275
+
276
+ scores = {'macro': macro_score, 'markets': markets_score, 'geopolitical': geo_score}
277
+ return max(scores, key=scores.get) if max(scores.values()) > 0 else 'markets'
278
+
279
+ def _analyze_sentiment(self, text: str) -> str:
280
+ """Analyze sentiment based on keywords"""
281
+ positive = ['surge', 'soar', 'rally', 'beat', 'upgrade', 'gain', 'rise', 'bullish', 'positive']
282
+ negative = ['plunge', 'crash', 'fall', 'miss', 'downgrade', 'loss', 'drop', 'bearish', 'negative']
283
+
284
+ pos_count = sum(1 for word in positive if word in text)
285
+ neg_count = sum(1 for word in negative if word in text)
286
+
287
+ if pos_count > neg_count:
288
+ return 'positive'
289
+ elif neg_count > pos_count:
290
+ return 'negative'
291
+ return 'neutral'
292
+
293
+ def _assess_impact(self, sector_weight: float, keyword_matches: int) -> str:
294
+ """Assess impact based on sector weight and keyword relevance"""
295
+ if sector_weight >= 1.5 and keyword_matches >= 3:
296
+ return 'high'
297
+ elif keyword_matches >= 2:
298
+ return 'medium'
299
+ else:
300
+ return 'low'
301
+
302
+ def _get_mock_sectoral_news(self) -> List[Dict]:
303
+ """Mock sectoral news for development"""
304
+ now = datetime.now()
305
+
306
+ return [
307
+ {
308
+ 'id': 1,
309
+ 'title': 'Apple announces new iPhone with advanced AI capabilities',
310
+ 'summary': 'Apple unveils next-generation iPhone featuring on-device AI processing',
311
+ 'source': 'Technology',
312
+ 'sector': 'tech',
313
+ 'category': 'markets',
314
+ 'timestamp': now - timedelta(minutes=30),
315
+ 'sentiment': 'positive',
316
+ 'impact': 'high',
317
+ 'url': 'https://techcrunch.com',
318
+ 'likes': 0,
319
+ 'retweets': 0,
320
+ 'is_breaking': False,
321
+ 'source_weight': 1.5,
322
+ 'from_web': False
323
+ },
324
+ {
325
+ 'id': 2,
326
+ 'title': 'JPMorgan reports strong Q4 earnings beat analyst expectations',
327
+ 'summary': 'Major investment bank posts record profits amid trading surge',
328
+ 'source': 'Finance',
329
+ 'sector': 'finance',
330
+ 'category': 'markets',
331
+ 'timestamp': now - timedelta(hours=1),
332
+ 'sentiment': 'positive',
333
+ 'impact': 'high',
334
+ 'url': 'https://cnbc.com',
335
+ 'likes': 0,
336
+ 'retweets': 0,
337
+ 'is_breaking': False,
338
+ 'source_weight': 1.5,
339
+ 'from_web': False
340
+ },
341
+ {
342
+ 'id': 3,
343
+ 'title': 'OPEC+ extends oil production cuts through Q2',
344
+ 'summary': 'Major oil producers agree to maintain supply restrictions',
345
+ 'source': 'Energy',
346
+ 'sector': 'energy',
347
+ 'category': 'geopolitical',
348
+ 'timestamp': now - timedelta(hours=2),
349
+ 'sentiment': 'neutral',
350
+ 'impact': 'high',
351
+ 'url': 'https://reuters.com',
352
+ 'likes': 0,
353
+ 'retweets': 0,
354
+ 'is_breaking': False,
355
+ 'source_weight': 1.6,
356
+ 'from_web': False
357
+ },
358
+ {
359
+ 'id': 4,
360
+ 'title': 'Pfizer receives FDA approval for new cancer treatment',
361
+ 'summary': 'Breakthrough therapy approved for late-stage lung cancer',
362
+ 'source': 'Healthcare',
363
+ 'sector': 'healthcare',
364
+ 'category': 'markets',
365
+ 'timestamp': now - timedelta(hours=3),
366
+ 'sentiment': 'positive',
367
+ 'impact': 'medium',
368
+ 'url': 'https://cnbc.com',
369
+ 'likes': 0,
370
+ 'retweets': 0,
371
+ 'is_breaking': False,
372
+ 'source_weight': 1.5,
373
+ 'from_web': False
374
+ },
375
+ {
376
+ 'id': 5,
377
+ 'title': 'Amazon expands same-day delivery to 50 new cities',
378
+ 'summary': 'E-commerce giant accelerates logistics network expansion',
379
+ 'source': 'Consumer & Retail',
380
+ 'sector': 'consumer',
381
+ 'category': 'markets',
382
+ 'timestamp': now - timedelta(hours=4),
383
+ 'sentiment': 'positive',
384
+ 'impact': 'medium',
385
+ 'url': 'https://techcrunch.com',
386
+ 'likes': 0,
387
+ 'retweets': 0,
388
+ 'is_breaking': False,
389
+ 'source_weight': 1.3,
390
+ 'from_web': False
391
+ },
392
+ {
393
+ 'id': 6,
394
+ 'title': 'Boeing wins $10B contract for new military aircraft',
395
+ 'summary': 'Defense contractor secures major government order',
396
+ 'source': 'Industrials',
397
+ 'sector': 'industrials',
398
+ 'category': 'markets',
399
+ 'timestamp': now - timedelta(hours=5),
400
+ 'sentiment': 'positive',
401
+ 'impact': 'medium',
402
+ 'url': 'https://reuters.com',
403
+ 'likes': 0,
404
+ 'retweets': 0,
405
+ 'is_breaking': False,
406
+ 'source_weight': 1.4,
407
+ 'from_web': False
408
+ },
409
+ {
410
+ 'id': 7,
411
+ 'title': 'US housing starts surge 15% in December',
412
+ 'summary': 'Construction activity rebounds amid lower mortgage rates',
413
+ 'source': 'Real Estate',
414
+ 'sector': 'real_estate',
415
+ 'category': 'macro',
416
+ 'timestamp': now - timedelta(hours=6),
417
+ 'sentiment': 'positive',
418
+ 'impact': 'medium',
419
+ 'url': 'https://cnbc.com',
420
+ 'likes': 0,
421
+ 'retweets': 0,
422
+ 'is_breaking': False,
423
+ 'source_weight': 1.3,
424
+ 'from_web': False
425
+ }
426
+ ]
app/utils/news_cache.py CHANGED
@@ -34,6 +34,10 @@ class NewsCacheManager:
34
  'reddit': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
35
  'rss': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
36
  'ai_tech': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
 
 
 
 
37
  'dedup_index': {}, # Global deduplication index
38
  'filtered_cache': {} # Cached filtered results
39
  }
@@ -312,7 +316,7 @@ class NewsCacheManager:
312
  self._clear_source_from_dedup(source)
313
  logger.info(f"🗑️ Cleared cache for {source}")
314
  else:
315
- for src in ['twitter', 'reddit', 'rss', 'ai_tech']:
316
  self.cache[src] = {'raw_news': [], 'last_fetch': None, 'ttl': 180}
317
  self.cache['dedup_index'] = {}
318
  self.cache['filtered_cache'] = {}
@@ -346,6 +350,26 @@ class NewsCacheManager:
346
  'age_seconds': self._get_cache_age('ai_tech'),
347
  'is_valid': self._is_cache_valid('ai_tech')
348
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  'dedup_index_size': len(self.cache['dedup_index']),
350
  'filtered_cache_size': len(self.cache['filtered_cache'])
351
  }
 
34
  'reddit': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
35
  'rss': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
36
  'ai_tech': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
37
+ 'predictions': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
38
+ 'sectoral_news': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
39
+ 'market_events': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
40
+ 'economic_calendar': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
41
  'dedup_index': {}, # Global deduplication index
42
  'filtered_cache': {} # Cached filtered results
43
  }
 
316
  self._clear_source_from_dedup(source)
317
  logger.info(f"🗑️ Cleared cache for {source}")
318
  else:
319
+ for src in ['twitter', 'reddit', 'rss', 'ai_tech', 'predictions', 'sectoral_news', 'market_events', 'economic_calendar']:
320
  self.cache[src] = {'raw_news': [], 'last_fetch': None, 'ttl': 180}
321
  self.cache['dedup_index'] = {}
322
  self.cache['filtered_cache'] = {}
 
350
  'age_seconds': self._get_cache_age('ai_tech'),
351
  'is_valid': self._is_cache_valid('ai_tech')
352
  },
353
+ 'predictions': {
354
+ 'items': len(self.cache['predictions']['raw_news']),
355
+ 'age_seconds': self._get_cache_age('predictions'),
356
+ 'is_valid': self._is_cache_valid('predictions')
357
+ },
358
+ 'sectoral_news': {
359
+ 'items': len(self.cache['sectoral_news']['raw_news']),
360
+ 'age_seconds': self._get_cache_age('sectoral_news'),
361
+ 'is_valid': self._is_cache_valid('sectoral_news')
362
+ },
363
+ 'market_events': {
364
+ 'items': len(self.cache['market_events']['raw_news']),
365
+ 'age_seconds': self._get_cache_age('market_events'),
366
+ 'is_valid': self._is_cache_valid('market_events')
367
+ },
368
+ 'economic_calendar': {
369
+ 'items': len(self.cache['economic_calendar']['raw_news']),
370
+ 'age_seconds': self._get_cache_age('economic_calendar'),
371
+ 'is_valid': self._is_cache_valid('economic_calendar')
372
+ },
373
  'dedup_index_size': len(self.cache['dedup_index']),
374
  'filtered_cache_size': len(self.cache['filtered_cache'])
375
  }