Spaces:

ResearchEngineering
/

FinancialPlatform

Paused

App Files Files Community

Dmitry Beresnev commited on Jan 24

Commit

e918eaf

1 Parent(s): 07b3173

add AI news feed

Browse files

Files changed (2) hide show

app/pages/05_Dashboard.py +80 -7
app/services/ai_tech_news.py +293 -0

app/pages/05_Dashboard.py CHANGED Viewed

@@ -38,6 +38,12 @@ try:
 except ImportError:
     REDDIT_AVAILABLE = False
 # ---- Page Configuration ----
 st.set_page_config(
@@ -60,9 +66,13 @@ if 'twitter_monitor' not in st.session_state and TWITTER_AVAILABLE:
 if 'reddit_monitor' not in st.session_state and REDDIT_AVAILABLE:
     st.session_state.reddit_monitor = RedditFinanceMonitor()
 rss_monitor = st.session_state.get('rss_monitor')
 twitter_monitor = st.session_state.get('twitter_monitor')
 reddit_monitor = st.session_state.get('reddit_monitor')
 # Initialize unified cache manager
 if 'news_cache_manager' not in st.session_state:
@@ -132,7 +142,8 @@ with st.sidebar:
     total_stories = (
         cache_stats['twitter']['items'] +
         cache_stats['reddit']['items'] +
-        cache_stats['rss']['items']
     )
     # Display metrics
@@ -153,7 +164,8 @@ with st.sidebar:
     twitter_sources = len(twitter_monitor.SOURCES) if twitter_monitor else 0
     reddit_sources = len(reddit_monitor.SUBREDDITS) if reddit_monitor else 0
     rss_sources = len(rss_monitor.SOURCES) if rss_monitor else 0
-    total_sources = twitter_sources + reddit_sources + rss_sources
     st.markdown(f"""
     <div style='font-size: 11px; line-height: 1.6;'>
@@ -174,6 +186,12 @@ with st.sidebar:
     • BBC • Yahoo Finance • The Economist
     • Fed (2.0x) • ECB (2.0x) • IMF
     **Total: {total_sources} Premium Sources**
     </div>
     """, unsafe_allow_html=True)
@@ -192,6 +210,7 @@ twitter_df = pd.DataFrame()
 reddit_df = pd.DataFrame()
 rss_all_df = pd.DataFrame()
 rss_main_df = pd.DataFrame()
 def fetch_twitter_news():
     """Fetch Twitter/X news via cache manager"""
@@ -254,19 +273,42 @@ def fetch_rss_news():
         return pd.DataFrame(), f"RSS scraping unavailable: {e}"
     return pd.DataFrame(), None
-with st.spinner("🔍 Fetching latest financial news in parallel..."):
     # Execute all news fetching operations in parallel using ThreadPoolExecutor
-    with ThreadPoolExecutor(max_workers=3) as executor:
         # Submit all tasks
         future_twitter = executor.submit(fetch_twitter_news)
         future_reddit = executor.submit(fetch_reddit_news)
         future_rss = executor.submit(fetch_rss_news)
         # Collect results as they complete
         futures = {
             'twitter': future_twitter,
             'reddit': future_reddit,
-            'rss': future_rss
         }
         for source_name, future in futures.items():
@@ -285,6 +327,10 @@ with st.spinner("🔍 Fetching latest financial news in parallel..."):
                     rss_all_df = result_df
                     if error:
                         st.warning(error)
                     # Get main page news subset for RSS
                     if not rss_all_df.empty and 'from_web' in rss_all_df.columns:
                         rss_main_df = rss_all_df[rss_all_df['from_web'] == True].copy()
@@ -338,9 +384,9 @@ if not all_news_df.empty:
 st.markdown("---")
-# ---- THREE-COLUMN SCROLLABLE NEWS LAYOUT (TradingView Style) ----
-col1, col2, col3 = st.columns(3)
 with col1:
     # SECTION 1: Twitter/X & Reddit Breaking News
@@ -447,6 +493,33 @@ with col3:
         </style>
         """, unsafe_allow_html=True)
 # Auto-refresh logic
 if auto_refresh:
     import time

 except ImportError:
     REDDIT_AVAILABLE = False
+try:
+    from services.ai_tech_news import AITechNewsScraper
+    AI_TECH_AVAILABLE = True
+except ImportError:
+    AI_TECH_AVAILABLE = False
 # ---- Page Configuration ----
 st.set_page_config(
 if 'reddit_monitor' not in st.session_state and REDDIT_AVAILABLE:
     st.session_state.reddit_monitor = RedditFinanceMonitor()
+if 'ai_tech_monitor' not in st.session_state and AI_TECH_AVAILABLE:
+    st.session_state.ai_tech_monitor = AITechNewsScraper()
 rss_monitor = st.session_state.get('rss_monitor')
 twitter_monitor = st.session_state.get('twitter_monitor')
 reddit_monitor = st.session_state.get('reddit_monitor')
+ai_tech_monitor = st.session_state.get('ai_tech_monitor')
 # Initialize unified cache manager
 if 'news_cache_manager' not in st.session_state:
     total_stories = (
         cache_stats['twitter']['items'] +
         cache_stats['reddit']['items'] +
+        cache_stats['rss']['items'] +
+        cache_stats.get('ai_tech', {}).get('items', 0)
     )
     # Display metrics
     twitter_sources = len(twitter_monitor.SOURCES) if twitter_monitor else 0
     reddit_sources = len(reddit_monitor.SUBREDDITS) if reddit_monitor else 0
     rss_sources = len(rss_monitor.SOURCES) if rss_monitor else 0
+    ai_tech_sources = len(ai_tech_monitor.SOURCES) if ai_tech_monitor else 0
+    total_sources = twitter_sources + reddit_sources + rss_sources + ai_tech_sources
     st.markdown(f"""
     <div style='font-size: 11px; line-height: 1.6;'>
     • BBC • Yahoo Finance • The Economist
     • Fed (2.0x) • ECB (2.0x) • IMF
+    **AI & Tech Sources ({ai_tech_sources})**
+    • OpenAI • Google AI • Microsoft AI • Meta AI
+    • DeepMind • Anthropic • AWS AI • NVIDIA
+    • TechCrunch • The Verge • VentureBeat
+    • MIT Tech Review • Wired • Ars Technica
     **Total: {total_sources} Premium Sources**
     </div>
     """, unsafe_allow_html=True)
 reddit_df = pd.DataFrame()
 rss_all_df = pd.DataFrame()
 rss_main_df = pd.DataFrame()
+ai_tech_df = pd.DataFrame()
 def fetch_twitter_news():
     """Fetch Twitter/X news via cache manager"""
         return pd.DataFrame(), f"RSS scraping unavailable: {e}"
     return pd.DataFrame(), None
+def fetch_ai_tech_news():
+    """Fetch AI/Tech news via cache manager"""
+    try:
+        if ai_tech_monitor:
+            # Use cache manager for smart caching
+            ai_tech_news = cache_manager.get_news(
+                source='ai_tech',
+                fetcher_func=ai_tech_monitor.scrape_ai_tech_news,
+                force_refresh=force_refresh,
+                max_items=100,
+                hours=48
+            )
+            if ai_tech_news:
+                df = pd.DataFrame(ai_tech_news)
+                if not df.empty:
+                    df['timestamp'] = pd.to_datetime(df['timestamp'])
+                    return df, None
+    except Exception as e:
+        return pd.DataFrame(), f"AI/Tech news unavailable: {e}"
+    return pd.DataFrame(), None
+with st.spinner("🔍 Fetching latest financial & tech news in parallel..."):
     # Execute all news fetching operations in parallel using ThreadPoolExecutor
+    with ThreadPoolExecutor(max_workers=4) as executor:
         # Submit all tasks
         future_twitter = executor.submit(fetch_twitter_news)
         future_reddit = executor.submit(fetch_reddit_news)
         future_rss = executor.submit(fetch_rss_news)
+        future_ai_tech = executor.submit(fetch_ai_tech_news)
         # Collect results as they complete
         futures = {
             'twitter': future_twitter,
             'reddit': future_reddit,
+            'rss': future_rss,
+            'ai_tech': future_ai_tech
         }
         for source_name, future in futures.items():
                     rss_all_df = result_df
                     if error:
                         st.warning(error)
+                elif source_name == 'ai_tech':
+                    ai_tech_df = result_df
+                    if error:
+                        st.warning(error)
                     # Get main page news subset for RSS
                     if not rss_all_df.empty and 'from_web' in rss_all_df.columns:
                         rss_main_df = rss_all_df[rss_all_df['from_web'] == True].copy()
 st.markdown("---")
+# ---- FOUR-COLUMN SCROLLABLE NEWS LAYOUT (TradingView Style) ----
+col1, col2, col3, col4 = st.columns(4)
 with col1:
     # SECTION 1: Twitter/X & Reddit Breaking News
         </style>
         """, unsafe_allow_html=True)
+with col4:
+    # SECTION 4: AI & Tech News
+    if not ai_tech_df.empty:
+        display_scrollable_news_section(
+            ai_tech_df,
+            section_title="AI & Tech News",
+            section_icon="🤖",
+            section_subtitle="Latest from tech giants & AI research",
+            max_items=100,
+            height="700px"
+        )
+    else:
+        st.markdown("""
+        <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
+            <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
+            <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading AI & Tech News</div>
+            <div style="color: #787B86; font-size: 13px;">Aggregating from tech blogs & research...</div>
+            <div style="color: #787B86; font-size: 12px; margin-top: 8px; opacity: 0.7;">OpenAI, Google AI, Microsoft, Meta & more</div>
+        </div>
+        <style>
+        @keyframes pulse {
+            0%, 100% { opacity: 1; transform: scale(1); }
+            50% { opacity: 0.6; transform: scale(1.1); }
+        }
+        </style>
+        """, unsafe_allow_html=True)
 # Auto-refresh logic
 if auto_refresh:
     import time

app/services/ai_tech_news.py ADDED Viewed

	@@ -0,0 +1,293 @@

+"""
+AI & Tech News Scraper
+Fetches news from popular tech resources and big tech company blogs
+"""
+import feedparser
+import requests
+from bs4 import BeautifulSoup
+from datetime import datetime, timedelta
+from typing import List, Dict
+import logging
+logger = logging.getLogger(__name__)
+class AITechNewsScraper:
+    """Scraper for AI and tech news from major sources and company blogs"""
+    # AI/Tech News Sources (RSS + Web)
+    SOURCES = {
+        # Major Tech News
+        'TechCrunch AI': {
+            'url': 'https://techcrunch.com/category/artificial-intelligence/feed/',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'The Verge AI': {
+            'url': 'https://www.theverge.com/ai-artificial-intelligence/rss/index.xml',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'VentureBeat AI': {
+            'url': 'https://venturebeat.com/category/ai/feed/',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'MIT Technology Review AI': {
+            'url': 'https://www.technologyreview.com/topic/artificial-intelligence/feed',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'Ars Technica AI': {
+            'url': 'https://feeds.arstechnica.com/arstechnica/technology-lab',
+            'type': 'rss',
+            'category': 'tech'
+        },
+        'Wired AI': {
+            'url': 'https://www.wired.com/feed/tag/ai/latest/rss',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        # Big Tech Company Blogs
+        'OpenAI Blog': {
+            'url': 'https://openai.com/blog/rss.xml',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'Google AI Blog': {
+            'url': 'https://blog.google/technology/ai/rss/',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'Microsoft AI Blog': {
+            'url': 'https://blogs.microsoft.com/ai/feed/',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'Meta AI Blog': {
+            'url': 'https://ai.meta.com/blog/rss/',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'DeepMind Blog': {
+            'url': 'https://deepmind.google/blog/rss.xml',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'Anthropic News': {
+            'url': 'https://www.anthropic.com/news/rss.xml',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'AWS AI Blog': {
+            'url': 'https://aws.amazon.com/blogs/machine-learning/feed/',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        'NVIDIA AI Blog': {
+            'url': 'https://blogs.nvidia.com/feed/',
+            'type': 'rss',
+            'category': 'ai'
+        },
+        # Research & Academia
+        'Stanford HAI': {
+            'url': 'https://hai.stanford.edu/news/rss.xml',
+            'type': 'rss',
+            'category': 'research'
+        },
+        'Berkeley AI Research': {
+            'url': 'https://bair.berkeley.edu/blog/feed.xml',
+            'type': 'rss',
+            'category': 'research'
+        },
+    }
+    def __init__(self):
+        """Initialize the AI/Tech news scraper"""
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
+        })
+    def scrape_ai_tech_news(self, max_items: int = 100, hours: int = 48) -> List[Dict]:
+        """
+        Scrape AI and tech news from all sources
+        Args:
+            max_items: Maximum number of news items to return
+            hours: Only include news from the last N hours
+        Returns:
+            List of news items with standardized format
+        """
+        all_news = []
+        cutoff_time = datetime.now() - timedelta(hours=hours)
+        for source_name, source_config in self.SOURCES.items():
+            try:
+                if source_config['type'] == 'rss':
+                    news_items = self._scrape_rss_feed(
+                        source_name,
+                        source_config['url'],
+                        source_config['category'],
+                        cutoff_time
+                    )
+                    all_news.extend(news_items)
+                    logger.info(f"Scraped {len(news_items)} items from {source_name}")
+            except Exception as e:
+                logger.error(f"Error scraping {source_name}: {e}")
+                continue
+        # Sort by timestamp (newest first)
+        all_news.sort(key=lambda x: x['timestamp'], reverse=True)
+        # Limit to max_items
+        return all_news[:max_items]
+    def _scrape_rss_feed(self, source_name: str, feed_url: str,
+                        category: str, cutoff_time: datetime) -> List[Dict]:
+        """Scrape a single RSS feed"""
+        news_items = []
+        try:
+            feed = feedparser.parse(feed_url)
+            for entry in feed.entries:
+                try:
+                    # Parse timestamp
+                    if hasattr(entry, 'published_parsed') and entry.published_parsed:
+                        timestamp = datetime(*entry.published_parsed[:6])
+                    elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
+                        timestamp = datetime(*entry.updated_parsed[:6])
+                    else:
+                        timestamp = datetime.now()
+                    # Skip old news
+                    if timestamp < cutoff_time:
+                        continue
+                    # Extract title and summary
+                    title = entry.get('title', 'No title')
+                    summary = entry.get('summary', entry.get('description', ''))
+                    # Clean HTML from summary
+                    if summary:
+                        soup = BeautifulSoup(summary, 'html.parser')
+                        summary = soup.get_text().strip()
+                        # Limit summary length
+                        if len(summary) > 300:
+                            summary = summary[:297] + '...'
+                    # Determine impact and sentiment based on keywords
+                    impact = self._determine_impact(title, summary)
+                    sentiment = self._determine_sentiment(title, summary)
+                    news_item = {
+                        'title': title,
+                        'summary': summary or title,
+                        'source': source_name,
+                        'url': entry.get('link', ''),
+                        'timestamp': timestamp,
+                        'category': category,
+                        'impact': impact,
+                        'sentiment': sentiment,
+                        'is_breaking': self._is_breaking_news(title, summary),
+                        'likes': 0,  # No engagement data for RSS
+                        'retweets': 0,
+                        'reddit_score': 0,
+                        'reddit_comments': 0
+                    }
+                    news_items.append(news_item)
+                except Exception as e:
+                    logger.error(f"Error parsing entry from {source_name}: {e}")
+                    continue
+        except Exception as e:
+            logger.error(f"Error fetching RSS feed {feed_url}: {e}")
+        return news_items
+    def _determine_impact(self, title: str, summary: str) -> str:
+        """Determine impact level based on keywords"""
+        text = f"{title} {summary}".lower()
+        high_impact_keywords = [
+            'breakthrough', 'announce', 'launch', 'release', 'new model',
+            'gpt', 'claude', 'gemini', 'llama', 'chatgpt',
+            'billion', 'trillion', 'acquisition', 'merger',
+            'regulation', 'ban', 'lawsuit', 'security breach',
+            'major', 'significant', 'revolutionary', 'first-ever'
+        ]
+        medium_impact_keywords = [
+            'update', 'improve', 'enhance', 'study', 'research',
+            'partnership', 'collaboration', 'funding', 'investment',
+            'expands', 'grows', 'adopts', 'implements'
+        ]
+        for keyword in high_impact_keywords:
+            if keyword in text:
+                return 'high'
+        for keyword in medium_impact_keywords:
+            if keyword in text:
+                return 'medium'
+        return 'low'
+    def _determine_sentiment(self, title: str, summary: str) -> str:
+        """Determine sentiment based on keywords"""
+        text = f"{title} {summary}".lower()
+        positive_keywords = [
+            'breakthrough', 'success', 'achieve', 'improve', 'advance',
+            'innovative', 'revolutionary', 'launch', 'release', 'win',
+            'growth', 'expand', 'partnership', 'collaboration'
+        ]
+        negative_keywords = [
+            'fail', 'issue', 'problem', 'concern', 'worry', 'risk',
+            'ban', 'lawsuit', 'breach', 'hack', 'leak', 'crisis',
+            'decline', 'loss', 'shutdown', 'controversy'
+        ]
+        positive_count = sum(1 for kw in positive_keywords if kw in text)
+        negative_count = sum(1 for kw in negative_keywords if kw in text)
+        if positive_count > negative_count:
+            return 'positive'
+        elif negative_count > positive_count:
+            return 'negative'
+        else:
+            return 'neutral'
+    def _is_breaking_news(self, title: str, summary: str) -> bool:
+        """Determine if news is breaking"""
+        text = f"{title} {summary}".lower()
+        breaking_indicators = [
+            'breaking', 'just announced', 'just released', 'just launched',
+            'alert', 'urgent', 'developing', 'live', 'now:'
+        ]
+        return any(indicator in text for indicator in breaking_indicators)
+    def get_statistics(self) -> Dict:
+        """Get statistics - returns empty for backward compatibility"""
+        return {
+            'total': 0,
+            'high_impact': 0,
+            'breaking': 0,
+            'last_update': 'Managed by cache',
+            'by_category': {
+                'ai': 0,
+                'tech': 0,
+                'research': 0
+            }
+        }