""" News & AI Dashboard Page - Real-time Financial Intelligence Powered by professional-grade news monitoring with low-latency delivery """ import streamlit as st import sys import os import logging # Suppress noisy Playwright asyncio errors logging.getLogger('asyncio').setLevel(logging.CRITICAL) logging.getLogger('playwright').setLevel(logging.WARNING) # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from components.styles import DARK_THEME_CSS from components.news import ( display_news_statistics, display_category_breakdown, display_breaking_news_banner, display_scrollable_news_section, display_prediction_card, display_economic_event_card, display_economic_calendar_widget ) from utils.breaking_news_scorer import get_breaking_news_scorer # Import news scrapers try: from services.news_scraper import FinanceNewsScraper RSS_AVAILABLE = True except ImportError: RSS_AVAILABLE = False try: from services.twitter_news_playwright import TwitterFinanceMonitor TWITTER_AVAILABLE = True except ImportError: TWITTER_AVAILABLE = False try: from services.reddit_news import RedditFinanceMonitor REDDIT_AVAILABLE = True except ImportError: REDDIT_AVAILABLE = False try: from services.ai_tech_news import AITechNewsScraper AI_TECH_AVAILABLE = True except ImportError: AI_TECH_AVAILABLE = False try: from services.prediction_markets import PredictionMarketsScraper PREDICTIONS_AVAILABLE = True except ImportError: PREDICTIONS_AVAILABLE = False try: from services.sectoral_news import SectoralNewsScraper SECTORAL_AVAILABLE = True except ImportError: SECTORAL_AVAILABLE = False try: from services.market_events import MarketEventsScraper EVENTS_AVAILABLE = True except ImportError: EVENTS_AVAILABLE = False try: from services.economic_calendar import EconomicCalendarService CALENDAR_AVAILABLE = True except ImportError: CALENDAR_AVAILABLE = False # ---- Page Configuration ---- st.set_page_config( page_title="News Dashboard - Financial Platform", page_icon="📰", layout="wide", initial_sidebar_state="expanded", ) # ---- Apply Dark Theme ---- st.markdown(DARK_THEME_CSS, unsafe_allow_html=True) # Initialize news monitors (with caching) if 'rss_monitor' not in st.session_state and RSS_AVAILABLE: st.session_state.rss_monitor = FinanceNewsScraper() if 'twitter_monitor' not in st.session_state and TWITTER_AVAILABLE: st.session_state.twitter_monitor = TwitterFinanceMonitor() if 'reddit_monitor' not in st.session_state and REDDIT_AVAILABLE: st.session_state.reddit_monitor = RedditFinanceMonitor() if 'ai_tech_monitor' not in st.session_state and AI_TECH_AVAILABLE: st.session_state.ai_tech_monitor = AITechNewsScraper() if 'prediction_markets_monitor' not in st.session_state and PREDICTIONS_AVAILABLE: st.session_state.prediction_markets_monitor = PredictionMarketsScraper() if 'sectoral_news_monitor' not in st.session_state and SECTORAL_AVAILABLE: st.session_state.sectoral_news_monitor = SectoralNewsScraper() if 'market_events_monitor' not in st.session_state and EVENTS_AVAILABLE: st.session_state.market_events_monitor = MarketEventsScraper() if 'economic_calendar_service' not in st.session_state and CALENDAR_AVAILABLE: st.session_state.economic_calendar_service = EconomicCalendarService() rss_monitor = st.session_state.get('rss_monitor') twitter_monitor = st.session_state.get('twitter_monitor') reddit_monitor = st.session_state.get('reddit_monitor') ai_tech_monitor = st.session_state.get('ai_tech_monitor') prediction_markets_monitor = st.session_state.get('prediction_markets_monitor') sectoral_news_monitor = st.session_state.get('sectoral_news_monitor') market_events_monitor = st.session_state.get('market_events_monitor') economic_calendar_service = st.session_state.get('economic_calendar_service') # Initialize unified cache manager if 'news_cache_manager' not in st.session_state: from utils.news_cache import NewsCacheManager st.session_state.news_cache_manager = NewsCacheManager(default_ttl=180) cache_manager = st.session_state.news_cache_manager # ---- Header ---- st.markdown("# 🤖 Live Financial News & AI Dashboard") st.markdown("AI-powered market insights with sentiment analysis and trading recommendations. Real-time macro, markets & geopolitical intelligence") st.markdown("---") # ---- Sidebar Filters ---- with st.sidebar: st.markdown("## ⚙️ News Filters") # Category filter category_filter = st.selectbox( "Category", ["all", "macro", "markets", "geopolitical"], format_func=lambda x: x.upper() if x != "all" else "ALL CATEGORIES", help="Filter by news category" ) # Sentiment filter sentiment_filter = st.selectbox( "Sentiment", ["all", "positive", "negative", "neutral"], format_func=lambda x: x.upper() if x != "all" else "ALL SENTIMENTS", help="Filter by market sentiment" ) # Impact filter impact_filter = st.selectbox( "Impact Level", ["all", "high", "medium", "low"], format_func=lambda x: x.upper() if x != "all" else "ALL IMPACT LEVELS", help="Filter by market impact" ) st.markdown("---") # Refresh controls st.markdown("### 🔄 Refresh Settings") col1, col2 = st.columns(2) with col1: if st.button("🔄 Refresh Now", use_container_width=True, type="primary"): st.session_state.force_refresh = True st.rerun() with col2: auto_refresh = st.checkbox("Auto-refresh", value=True, help="Auto-refresh every 3 minutes") if auto_refresh: st.info("⏱️ Auto-refresh enabled (3 min)") st.markdown("---") st.markdown("### 📊 Feed Statistics") # Get cache statistics from cache manager cache_stats = cache_manager.get_statistics() # Calculate totals from cache total_stories = ( cache_stats['twitter']['items'] + cache_stats['reddit']['items'] + cache_stats['rss']['items'] + cache_stats.get('ai_tech', {}).get('items', 0) ) # Display metrics st.metric("Total Stories", total_stories) st.metric("Cache Status", "✅ Active" if total_stories > 0 else "⏳ Loading") # Show cache age for transparency if cache_stats['twitter']['is_valid']: age = int(cache_stats['twitter']['age_seconds']) st.caption(f"🕐 Cache age: {age}s / 180s") else: st.caption("🔄 Fetching fresh data...") st.markdown("---") st.markdown("### ℹ️ Sources") # Count total sources twitter_sources = len(twitter_monitor.SOURCES) if twitter_monitor else 0 reddit_sources = len(reddit_monitor.SUBREDDITS) if reddit_monitor else 0 rss_sources = len(rss_monitor.SOURCES) if rss_monitor else 0 ai_tech_sources = len(ai_tech_monitor.SOURCES) if ai_tech_monitor else 0 prediction_sources = 3 # Polymarket, Metaculus, CME FedWatch sectoral_sources = 7 # 7 sectors events_sources = 3 # Earnings, indicators, central banks total_sources = twitter_sources + reddit_sources + rss_sources + ai_tech_sources + prediction_sources + sectoral_sources + events_sources st.markdown(f"""
**Twitter/X Accounts ({twitter_sources})** • WalterBloomberg • FXHedge • DeItaone • Reuters • Bloomberg • FT • WSJ • CNBC • BBC • MarketWatch • The Economist • AP • AFP **Reddit Communities ({reddit_sources})** • r/wallstreetbets • r/stocks • r/investing • r/algotrading • r/economics • r/geopolitics • r/options • r/SecurityAnalysis **RSS + Web Scraping ({rss_sources})** • CNBC • Bloomberg • FT • WSJ • BBC • Yahoo Finance • Google News • The Economist • Fed (2.0x) • ECB (2.0x) • IMF **AI & Tech Sources ({ai_tech_sources})** • OpenAI • Google AI • Microsoft AI • Meta AI • DeepMind • Anthropic • AWS AI • NVIDIA • TechCrunch • The Verge • VentureBeat • MIT Tech Review • Wired • Ars Technica **Prediction Markets ({prediction_sources})** • Polymarket • Metaculus • CME FedWatch **Sectoral Coverage ({sectoral_sources})** • Finance • Tech • Energy • Healthcare • Consumer • Industrials • Real Estate **Market Events ({events_sources})** • Earnings Calendar • Economic Indicators • Central Bank Events (Fed, ECB, BoE, BoJ) **Total: {total_sources} Premium Sources**
""", unsafe_allow_html=True) # ---- Main Content Area ---- # Check for forced refresh (don't clear yet - wait until after fetching) force_refresh = st.session_state.get('force_refresh', False) # Fetch news from all sources IN PARALLEL for maximum performance import pandas as pd from concurrent.futures import ThreadPoolExecutor, as_completed twitter_df = pd.DataFrame() reddit_df = pd.DataFrame() rss_all_df = pd.DataFrame() rss_main_df = pd.DataFrame() ai_tech_df = pd.DataFrame() predictions_df = pd.DataFrame() sectoral_news_df = pd.DataFrame() market_events_df = pd.DataFrame() economic_calendar_df = pd.DataFrame() def fetch_twitter_news(): """Fetch Twitter/X news via cache manager""" try: if twitter_monitor: # Use cache manager for smart caching twitter_news = cache_manager.get_news( source='twitter', fetcher_func=twitter_monitor.scrape_twitter_news, force_refresh=force_refresh, max_tweets=50 ) if twitter_news: df = pd.DataFrame(twitter_news) if not df.empty: df['timestamp'] = pd.to_datetime(df['timestamp']) return df, None except Exception as e: return pd.DataFrame(), f"Twitter scraping unavailable: {e}" return pd.DataFrame(), None def fetch_reddit_news(): """Fetch Reddit news via cache manager""" try: if reddit_monitor: # Use cache manager for smart caching reddit_news = cache_manager.get_news( source='reddit', fetcher_func=reddit_monitor.scrape_reddit_news, force_refresh=force_refresh, max_posts=50, hours=12 ) if reddit_news: df = pd.DataFrame(reddit_news) if not df.empty: df['timestamp'] = pd.to_datetime(df['timestamp']) return df, None except Exception as e: return pd.DataFrame(), f"Reddit scraping unavailable: {e}" return pd.DataFrame(), None def fetch_rss_news(): """Fetch RSS + Web scraped news via cache manager""" try: if rss_monitor: # Use cache manager for smart caching rss_news = cache_manager.get_news( source='rss', fetcher_func=rss_monitor.scrape_news, force_refresh=force_refresh, max_items=100 ) if rss_news: df = pd.DataFrame(rss_news) if not df.empty: df['timestamp'] = pd.to_datetime(df['timestamp']) return df, None except Exception as e: return pd.DataFrame(), f"RSS scraping unavailable: {e}" return pd.DataFrame(), None def fetch_ai_tech_news(): """Fetch AI/Tech news via cache manager""" try: if ai_tech_monitor: # Use cache manager for smart caching ai_tech_news = cache_manager.get_news( source='ai_tech', fetcher_func=ai_tech_monitor.scrape_ai_tech_news, force_refresh=force_refresh, max_items=100, hours=48 ) if ai_tech_news: df = pd.DataFrame(ai_tech_news) if not df.empty: df['timestamp'] = pd.to_datetime(df['timestamp']) return df, None except Exception as e: return pd.DataFrame(), f"AI/Tech news unavailable: {e}" return pd.DataFrame(), None def fetch_prediction_markets(): """Fetch prediction market data via cache manager""" try: if prediction_markets_monitor: predictions = cache_manager.get_news( source='predictions', fetcher_func=prediction_markets_monitor.scrape_predictions, force_refresh=force_refresh, max_items=50 ) if predictions: df = pd.DataFrame(predictions) if not df.empty: df['timestamp'] = pd.to_datetime(df['timestamp']) return df, None except Exception as e: return pd.DataFrame(), f"Prediction markets unavailable: {e}" return pd.DataFrame(), None def fetch_sectoral_news(): """Fetch sectoral news via cache manager""" try: if sectoral_news_monitor: sectoral_news = cache_manager.get_news( source='sectoral_news', fetcher_func=sectoral_news_monitor.scrape_sectoral_news, force_refresh=force_refresh, max_items=50, hours=24 ) if sectoral_news: df = pd.DataFrame(sectoral_news) if not df.empty: df['timestamp'] = pd.to_datetime(df['timestamp']) return df, None except Exception as e: return pd.DataFrame(), f"Sectoral news unavailable: {e}" return pd.DataFrame(), None def fetch_market_events(): """Fetch market events via cache manager""" try: if market_events_monitor: events = cache_manager.get_news( source='market_events', fetcher_func=market_events_monitor.scrape_market_events, force_refresh=force_refresh, max_items=50, days_ahead=14 ) if events: df = pd.DataFrame(events) if not df.empty: df['timestamp'] = pd.to_datetime(df['timestamp']) return df, None except Exception as e: return pd.DataFrame(), f"Market events unavailable: {e}" return pd.DataFrame(), None def fetch_economic_calendar(): """Fetch economic calendar via cache manager""" try: if economic_calendar_service: calendar_events = cache_manager.get_news( source='economic_calendar', fetcher_func=economic_calendar_service.get_upcoming_events, force_refresh=force_refresh, days_ahead=7, min_importance='medium' ) if calendar_events: df = pd.DataFrame(calendar_events) if not df.empty: df['timestamp'] = pd.to_datetime(df['timestamp']) return df, None except Exception as e: return pd.DataFrame(), f"Economic calendar unavailable: {e}" return pd.DataFrame(), None # Progressive loading: Display results as they arrive # Create a status placeholder to show progress status_placeholder = st.empty() # Execute all news fetching operations in parallel using ThreadPoolExecutor with st.spinner("Loading news from 8 sources..."): with ThreadPoolExecutor(max_workers=8) as executor: # Submit all tasks with source name attached futures_map = { executor.submit(fetch_twitter_news): 'twitter', executor.submit(fetch_reddit_news): 'reddit', executor.submit(fetch_rss_news): 'rss', executor.submit(fetch_ai_tech_news): 'ai_tech', executor.submit(fetch_prediction_markets): 'predictions', executor.submit(fetch_sectoral_news): 'sectoral_news', executor.submit(fetch_market_events): 'market_events', executor.submit(fetch_economic_calendar): 'economic_calendar' } # Track errors and completion fetch_errors = [] completed_sources = [] # Process results as they complete (progressive loading) try: for future in as_completed(futures_map, timeout=90): source_name = futures_map[future] try: result_df, error = future.result() # Update status completed_sources.append(source_name) status_placeholder.info(f"🔍 Loaded {len(completed_sources)}/8 sources ({', '.join(completed_sources)})") if source_name == 'twitter': twitter_df = result_df if error: fetch_errors.append(error) elif source_name == 'reddit': reddit_df = result_df if error: fetch_errors.append(error) elif source_name == 'rss': rss_all_df = result_df if error: fetch_errors.append(error) # Get main page news subset for RSS if not rss_all_df.empty and 'from_web' in rss_all_df.columns: rss_main_df = rss_all_df[rss_all_df['from_web'] == True].copy() elif source_name == 'ai_tech': ai_tech_df = result_df if error: fetch_errors.append(error) elif source_name == 'predictions': predictions_df = result_df if error: fetch_errors.append(error) elif source_name == 'sectoral_news': sectoral_news_df = result_df if error: fetch_errors.append(error) elif source_name == 'market_events': market_events_df = result_df if error: fetch_errors.append(error) elif source_name == 'economic_calendar': economic_calendar_df = result_df if error: fetch_errors.append(error) except Exception as e: fetch_errors.append(f"Error fetching {source_name} news: {e}") completed_sources.append(f"{source_name} (error)") status_placeholder.warning(f"⚠️ {source_name} failed, continuing with other sources...") except TimeoutError: # Handle timeout gracefully - continue with whatever results we have fetch_errors.append("⏱️ Some sources timed out after 90 seconds - displaying available results") status_placeholder.warning(f"⚠️ {len(completed_sources)}/8 sources loaded (some timed out)") # Mark incomplete sources all_sources = set(futures_map.values()) incomplete_sources = all_sources - set(completed_sources) for source in incomplete_sources: fetch_errors.append(f"{source} timed out - skipped") completed_sources.append(f"{source} (timeout)") # Clear the status message after all sources complete status_placeholder.success(f"✅ Loaded {len(completed_sources)}/8 sources successfully") # Debug logging (console only, not displayed on page) import logging logger = logging.getLogger(__name__) logger.info(f"News Fetch Results: Twitter={len(twitter_df)}, Reddit={len(reddit_df)}, RSS={len(rss_all_df)}, AI/Tech={len(ai_tech_df)}, Predictions={len(predictions_df)}, Sectoral={len(sectoral_news_df)}, Events={len(market_events_df)}, Calendar={len(economic_calendar_df)}") logger.info(f"Availability: Predictions={PREDICTIONS_AVAILABLE}, Sectoral={SECTORAL_AVAILABLE}, Events={EVENTS_AVAILABLE}, Calendar={CALENDAR_AVAILABLE}") if fetch_errors: for err in fetch_errors: logger.warning(f"Fetch error: {err}") # Clear force refresh flag after fetching is complete if force_refresh: st.session_state.force_refresh = False # Apply filters using cache manager (with filter result caching) filters = { 'category': category_filter, 'sentiment': sentiment_filter, 'impact': impact_filter } twitter_filtered = cache_manager.get_filtered_news(twitter_df, filters, 'twitter') if not twitter_df.empty else twitter_df reddit_filtered = cache_manager.get_filtered_news(reddit_df, filters, 'reddit') if not reddit_df.empty else reddit_df rss_main_filtered = cache_manager.get_filtered_news(rss_main_df, filters, 'rss_main') if not rss_main_df.empty else rss_main_df rss_all_filtered = cache_manager.get_filtered_news(rss_all_df, filters, 'rss_all') if not rss_all_df.empty else rss_all_df # Combine Twitter and Reddit for first column twitter_reddit_df = pd.concat([twitter_filtered, reddit_filtered], ignore_index=True) if not twitter_filtered.empty or not reddit_filtered.empty else pd.DataFrame() if not twitter_reddit_df.empty: twitter_reddit_df = twitter_reddit_df.sort_values('timestamp', ascending=False) # Combine all for breaking news banner all_news_df = pd.concat([twitter_filtered, reddit_filtered, rss_all_filtered], ignore_index=True) if not twitter_filtered.empty or not reddit_filtered.empty or not rss_all_filtered.empty else pd.DataFrame() # Display breaking news banner with ML-based scoring if not all_news_df.empty: # Initialize the breaking news scorer scorer = get_breaking_news_scorer() # Convert DataFrame to list of dicts for scoring all_news_list = all_news_df.to_dict('records') # Get top breaking news using multi-factor impact scoring # Only show news with impact score >= 40 (medium-high impact threshold) breaking_news_items = scorer.get_breaking_news(all_news_list, top_n=1) if breaking_news_items and breaking_news_items[0]['breaking_score'] >= 40.0: # Display the highest-impact news in the banner breaking_df = pd.DataFrame([breaking_news_items[0]]) display_breaking_news_banner(breaking_df) else: # If no high-impact news found, show informational message with score if breaking_news_items: top_score = breaking_news_items[0]['breaking_score'] st.info(f"📊 Monitoring financial markets - highest impact score: {top_score:.1f}/100 (threshold: 40)") else: st.info("📊 Monitoring financial markets - no news items available for scoring") else: # No news data available at all st.info("📊 Loading financial news - breaking news banner will appear when data is available") st.markdown("---") # ---- ECONOMIC CALENDAR WIDGET ---- if not economic_calendar_df.empty: display_economic_calendar_widget(economic_calendar_df) st.markdown("---") # ---- FOUR-COLUMN SCROLLABLE NEWS LAYOUT (TradingView Style) ---- col1, col2, col3, col4 = st.columns(4) with col1: # SECTION 1: Twitter/X & Reddit Breaking News if not twitter_reddit_df.empty: display_scrollable_news_section( twitter_reddit_df, section_title="Twitter/X & Reddit News", section_icon="🌐", section_subtitle="Real-time news from premium accounts & communities (last 12h)", max_items=100, height="700px" ) elif not twitter_df.empty or not reddit_df.empty: st.markdown("""
📭
No matches found
Try adjusting your filters to see Twitter/X & Reddit news
""", unsafe_allow_html=True) else: st.markdown("""
Loading Twitter/X & Reddit News
Fetching real-time news from premium sources...
This may take 30-60 seconds on first load
""", unsafe_allow_html=True) with col2: # SECTION 2: Main Page News (Web-Scraped) if not rss_main_filtered.empty: display_scrollable_news_section( rss_main_filtered, section_title="Top Headlines", section_icon="🔥", section_subtitle="Latest from main pages", max_items=50, height="700px" ) elif not rss_main_df.empty: st.markdown("""
📭
No matches found
Try adjusting your filters to see top headlines
""", unsafe_allow_html=True) else: st.markdown("""
Loading Top Headlines
Fetching latest news from major outlets...
Web scraping main pages
""", unsafe_allow_html=True) with col3: # SECTION 3: RSS Feed News if not rss_all_filtered.empty: display_scrollable_news_section( rss_all_filtered, section_title="RSS Feed", section_icon="📰", section_subtitle="Aggregated from all sources", max_items=100, height="700px" ) elif not rss_all_df.empty: st.markdown("""
📭
No matches found
Try adjusting your filters to see RSS feed news
""", unsafe_allow_html=True) else: st.markdown("""
Loading RSS Feed
Aggregating news from all RSS sources...
Bloomberg, Reuters, FT, WSJ & more
""", unsafe_allow_html=True) with col4: # SECTION 4: AI & Tech News if not ai_tech_df.empty: display_scrollable_news_section( ai_tech_df, section_title="AI & Tech News", section_icon="🤖", section_subtitle="Latest from tech giants & AI research", max_items=100, height="700px" ) else: # Debug: Check if there's an AI/Tech specific error ai_tech_error = next((err for err in fetch_errors if 'ai_tech' in err.lower() or 'AI/Tech' in err), None) if 'fetch_errors' in locals() else None if ai_tech_error: # Show error message st.markdown(f"""
⚠️
AI & Tech News Unavailable
{ai_tech_error}
""", unsafe_allow_html=True) else: # Show loading message st.markdown("""
Loading AI & Tech News
Aggregating from tech blogs & research...
OpenAI, Google AI, Microsoft, Meta & more
If this persists, check the "Source Fetch Warnings" section below
""", unsafe_allow_html=True) # ---- SECOND ROW: MARKET INTELLIGENCE (3 COLUMNS) ---- st.markdown("---") st.markdown("## 📊 Market Intelligence - Predictions, Sectors & Events") col5, col6, col7 = st.columns(3) with col5: # Prediction Markets Column if not predictions_df.empty: display_scrollable_news_section( predictions_df, section_title="Prediction Markets", section_icon="🎲", section_subtitle="Polymarket, Metaculus & CME FedWatch", max_items=50, height="600px" ) else: st.markdown("""
Loading Prediction Markets
Fetching market forecasts...
""", unsafe_allow_html=True) with col6: # Sectoral News Column if not sectoral_news_df.empty: display_scrollable_news_section( sectoral_news_df, section_title="Sectoral News", section_icon="🏭", section_subtitle="7 sectors: Finance, Tech, Energy & more", max_items=50, height="600px" ) else: st.markdown("""
Loading Sectoral News
Aggregating sector-specific news...
""", unsafe_allow_html=True) with col7: # Market Events Column if not market_events_df.empty: display_scrollable_news_section( market_events_df, section_title="Market Events", section_icon="📈", section_subtitle="Earnings, indicators & central banks", max_items=50, height="600px" ) else: st.markdown("""
Loading Market Events
Fetching earnings & economic indicators...
""", unsafe_allow_html=True) # Display fetch errors in expander (less intrusive) if 'fetch_errors' in locals() and fetch_errors: with st.expander("⚠️ Source Fetch Warnings", expanded=False): for error in fetch_errors: st.caption(f"• {error}") # Auto-refresh logic if auto_refresh: import time time.sleep(180) # 3 minutes st.rerun() # ---- Footer with Instructions ---- st.markdown("---") st.markdown(""" ### 💡 How to Use This Dashboard **For Traders:** - Monitor breaking news in real-time for market-moving events - Filter by category to focus on macro, markets, or geopolitical news - Use sentiment analysis to gauge market mood - High-impact news items require immediate attention **Tips:** - Enable auto-refresh for continuous monitoring during trading hours - Focus on "HIGH IMPACT" news for potential volatility - Breaking news (🔴) indicates urgent market-moving information - Check engagement metrics (likes + retweets) for news importance **Data Source:** Dual-mode scraping - RSS feeds + direct web page parsing from Reuters, Bloomberg, FT, WSJ, CNBC, Google News, Yahoo Finance, Fed, ECB and more **Update Frequency:** 3-minute cache for low-latency delivery **No Authentication Required:** Public sources - works out of the box """)