|
|
""" |
|
|
Professional Finance News Monitor using snscrape |
|
|
Real-time tracking: Macro, Markets, Geopolitical intelligence |
|
|
Optimized for low-latency trading decisions |
|
|
""" |
|
|
|
|
|
import pandas as pd |
|
|
from datetime import datetime, timedelta |
|
|
from typing import List, Dict, Optional |
|
|
import streamlit as st |
|
|
import time |
|
|
import logging |
|
|
import re |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
try: |
|
|
import snscrape.modules.twitter as sntwitter |
|
|
SNSCRAPE_AVAILABLE = True |
|
|
except ImportError: |
|
|
SNSCRAPE_AVAILABLE = False |
|
|
logger.warning("snscrape not available. Install with: pip install snscrape") |
|
|
|
|
|
|
|
|
class FinanceNewsMonitor: |
|
|
""" |
|
|
Professional-grade financial news aggregator |
|
|
Sources: Bloomberg, Reuters, WSJ, FT, CNBC, ZeroHedge |
|
|
""" |
|
|
|
|
|
|
|
|
SOURCES = { |
|
|
|
|
|
'reuters': { |
|
|
'handle': '@Reuters', |
|
|
'weight': 1.5, |
|
|
'specialization': ['macro', 'geopolitical', 'markets'] |
|
|
}, |
|
|
'bloomberg': { |
|
|
'handle': '@business', |
|
|
'weight': 1.5, |
|
|
'specialization': ['macro', 'markets'] |
|
|
}, |
|
|
'ft': { |
|
|
'handle': '@FT', |
|
|
'weight': 1.4, |
|
|
'specialization': ['macro', 'markets'] |
|
|
}, |
|
|
'economist': { |
|
|
'handle': '@TheEconomist', |
|
|
'weight': 1.3, |
|
|
'specialization': ['macro', 'geopolitical'] |
|
|
}, |
|
|
'wsj': { |
|
|
'handle': '@WSJ', |
|
|
'weight': 1.4, |
|
|
'specialization': ['markets', 'macro'] |
|
|
}, |
|
|
'bloomberg_terminal': { |
|
|
'handle': '@Bloomberg', |
|
|
'weight': 1.5, |
|
|
'specialization': ['macro', 'markets'] |
|
|
}, |
|
|
'cnbc': { |
|
|
'handle': '@CNBC', |
|
|
'weight': 1.2, |
|
|
'specialization': ['markets'] |
|
|
}, |
|
|
'marketwatch': { |
|
|
'handle': '@MarketWatch', |
|
|
'weight': 1.1, |
|
|
'specialization': ['markets'] |
|
|
}, |
|
|
|
|
|
|
|
|
'bbc_world': { |
|
|
'handle': '@BBCWorld', |
|
|
'weight': 1.4, |
|
|
'specialization': ['geopolitical'] |
|
|
}, |
|
|
'afp': { |
|
|
'handle': '@AFP', |
|
|
'weight': 1.3, |
|
|
'specialization': ['geopolitical'] |
|
|
}, |
|
|
'aljazeera': { |
|
|
'handle': '@AlJazeera', |
|
|
'weight': 1.2, |
|
|
'specialization': ['geopolitical'] |
|
|
}, |
|
|
'politico': { |
|
|
'handle': '@politico', |
|
|
'weight': 1.2, |
|
|
'specialization': ['geopolitical', 'macro'] |
|
|
}, |
|
|
'dw_news': { |
|
|
'handle': '@dwnews', |
|
|
'weight': 1.2, |
|
|
'specialization': ['geopolitical'] |
|
|
}, |
|
|
|
|
|
|
|
|
'federal_reserve': { |
|
|
'handle': '@federalreserve', |
|
|
'weight': 2.0, |
|
|
'specialization': ['macro'] |
|
|
}, |
|
|
'ecb': { |
|
|
'handle': '@ecb', |
|
|
'weight': 2.0, |
|
|
'specialization': ['macro'] |
|
|
}, |
|
|
'lagarde': { |
|
|
'handle': '@Lagarde', |
|
|
'weight': 1.9, |
|
|
'specialization': ['macro'] |
|
|
}, |
|
|
'bank_of_england': { |
|
|
'handle': '@bankofengland', |
|
|
'weight': 1.8, |
|
|
'specialization': ['macro'] |
|
|
}, |
|
|
'imf': { |
|
|
'handle': '@IMFNews', |
|
|
'weight': 1.7, |
|
|
'specialization': ['macro', 'geopolitical'] |
|
|
}, |
|
|
'world_bank': { |
|
|
'handle': '@worldbank', |
|
|
'weight': 1.6, |
|
|
'specialization': ['macro', 'geopolitical'] |
|
|
}, |
|
|
'us_treasury': { |
|
|
'handle': '@USTreasury', |
|
|
'weight': 1.8, |
|
|
'specialization': ['macro'] |
|
|
}, |
|
|
|
|
|
|
|
|
'zerohedge': { |
|
|
'handle': '@zerohedge', |
|
|
'weight': 1.0, |
|
|
'specialization': ['markets', 'macro'] |
|
|
}, |
|
|
'first_squawk': { |
|
|
'handle': '@FirstSquawk', |
|
|
'weight': 1.1, |
|
|
'specialization': ['markets', 'macro'] |
|
|
}, |
|
|
'live_squawk': { |
|
|
'handle': '@LiveSquawk', |
|
|
'weight': 1.1, |
|
|
'specialization': ['markets', 'macro'] |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
MACRO_KEYWORDS = [ |
|
|
|
|
|
'Fed', 'ECB', 'BoE', 'BoJ', 'FOMC', 'Powell', 'Lagarde', |
|
|
'interest rate', 'rate cut', 'rate hike', 'QE', 'quantitative', |
|
|
'monetary policy', 'dovish', 'hawkish', |
|
|
|
|
|
'GDP', 'inflation', 'CPI', 'PPI', 'PCE', 'NFP', 'payroll', |
|
|
'unemployment', 'jobless', 'retail sales', 'PMI', 'ISM', |
|
|
'consumer confidence', 'durable goods', 'housing starts', |
|
|
|
|
|
'recession', 'stimulus', 'fiscal policy', 'treasury', |
|
|
'yield curve', 'bond market' |
|
|
] |
|
|
|
|
|
GEO_KEYWORDS = [ |
|
|
|
|
|
'war', 'conflict', 'military', 'missile', 'attack', 'invasion', |
|
|
'sanctions', 'embargo', 'blockade', |
|
|
|
|
|
'election', 'impeachment', 'coup', 'protest', 'unrest', |
|
|
'geopolitical', 'tension', 'crisis', 'dispute', |
|
|
|
|
|
'trade war', 'tariff', 'trade deal', 'summit', 'treaty', |
|
|
'China', 'Russia', 'Taiwan', 'Middle East', 'Ukraine' |
|
|
] |
|
|
|
|
|
MARKET_KEYWORDS = [ |
|
|
|
|
|
'S&P', 'Nasdaq', 'Dow', 'Russell', 'VIX', 'volatility', |
|
|
'rally', 'sell-off', 'correction', 'crash', 'bull', 'bear', |
|
|
|
|
|
'earnings', 'EPS', 'revenue', 'guidance', 'beat', 'miss', |
|
|
'IPO', 'merger', 'acquisition', 'M&A', 'buyback', 'dividend', |
|
|
|
|
|
'tech stocks', 'banks', 'energy', 'commodities', 'crypto', |
|
|
'Bitcoin', 'oil', 'gold', 'dollar', 'DXY' |
|
|
] |
|
|
|
|
|
|
|
|
BREAKING_KEYWORDS = [ |
|
|
'BREAKING', 'ALERT', 'URGENT', 'just in', 'developing', |
|
|
'Fed', 'Powell', 'emergency', 'unexpected', 'surprise' |
|
|
] |
|
|
|
|
|
def __init__(self): |
|
|
self.news_cache = [] |
|
|
self.last_fetch = None |
|
|
self.cache_ttl = 180 |
|
|
|
|
|
@st.cache_data(ttl=180) |
|
|
def scrape_twitter_news(_self, max_tweets: int = 100) -> List[Dict]: |
|
|
""" |
|
|
Scrape latest financial news with caching |
|
|
max_tweets: Total tweets to fetch (distributed across sources) |
|
|
""" |
|
|
if not SNSCRAPE_AVAILABLE: |
|
|
logger.info("snscrape not available - using mock data") |
|
|
return _self._get_mock_news() |
|
|
|
|
|
all_tweets = [] |
|
|
tweets_per_source = max(5, max_tweets // len(_self.SOURCES)) |
|
|
failed_sources = 0 |
|
|
|
|
|
for source_name, source_info in _self.SOURCES.items(): |
|
|
try: |
|
|
handle = source_info['handle'].replace('@', '') |
|
|
|
|
|
query = f"from:{handle} -filter:replies -filter:retweets" |
|
|
|
|
|
scraped = 0 |
|
|
for tweet in sntwitter.TwitterSearchScraper(query).get_items(): |
|
|
if scraped >= tweets_per_source: |
|
|
break |
|
|
|
|
|
|
|
|
if (datetime.now() - tweet.date).days > 1: |
|
|
continue |
|
|
|
|
|
|
|
|
category = _self._categorize_tweet(tweet.content, source_info['specialization']) |
|
|
sentiment = _self._analyze_sentiment(tweet.content) |
|
|
impact = _self._assess_impact(tweet, source_info['weight']) |
|
|
is_breaking = _self._detect_breaking_news(tweet.content) |
|
|
|
|
|
all_tweets.append({ |
|
|
'id': tweet.id, |
|
|
'title': tweet.content, |
|
|
'summary': _self._extract_summary(tweet.content), |
|
|
'source': source_name.capitalize(), |
|
|
'category': category, |
|
|
'timestamp': tweet.date, |
|
|
'sentiment': sentiment, |
|
|
'impact': impact, |
|
|
'url': tweet.url, |
|
|
'likes': tweet.likeCount or 0, |
|
|
'retweets': tweet.retweetCount or 0, |
|
|
'is_breaking': is_breaking, |
|
|
'source_weight': source_info['weight'] |
|
|
}) |
|
|
scraped += 1 |
|
|
|
|
|
except Exception as e: |
|
|
failed_sources += 1 |
|
|
error_msg = str(e).lower() |
|
|
if 'blocked' in error_msg or '404' in error_msg: |
|
|
logger.warning(f"Twitter/X API blocked access for {source_name}") |
|
|
else: |
|
|
logger.error(f"Error scraping {source_name}: {e}") |
|
|
continue |
|
|
|
|
|
|
|
|
if failed_sources >= len(_self.SOURCES) or len(all_tweets) == 0: |
|
|
logger.warning("Twitter/X API unavailable - falling back to mock data for demonstration") |
|
|
return _self._get_mock_news() |
|
|
|
|
|
|
|
|
all_tweets.sort( |
|
|
key=lambda x: (x['is_breaking'], x['impact'] == 'high', x['timestamp']), |
|
|
reverse=True |
|
|
) |
|
|
|
|
|
return all_tweets |
|
|
|
|
|
def _categorize_tweet(self, text: str, source_specialization: List[str]) -> str: |
|
|
"""Advanced categorization with source specialization""" |
|
|
text_lower = text.lower() |
|
|
|
|
|
|
|
|
macro_score = sum(2 if kw.lower() in text_lower else 0 |
|
|
for kw in self.MACRO_KEYWORDS) |
|
|
geo_score = sum(2 if kw.lower() in text_lower else 0 |
|
|
for kw in self.GEO_KEYWORDS) |
|
|
market_score = sum(2 if kw.lower() in text_lower else 0 |
|
|
for kw in self.MARKET_KEYWORDS) |
|
|
|
|
|
|
|
|
if 'macro' in source_specialization: |
|
|
macro_score *= 1.5 |
|
|
if 'geopolitical' in source_specialization: |
|
|
geo_score *= 1.5 |
|
|
if 'markets' in source_specialization: |
|
|
market_score *= 1.5 |
|
|
|
|
|
scores = { |
|
|
'macro': macro_score, |
|
|
'geopolitical': geo_score, |
|
|
'markets': market_score |
|
|
} |
|
|
|
|
|
return max(scores, key=scores.get) if max(scores.values()) > 0 else 'general' |
|
|
|
|
|
def _analyze_sentiment(self, text: str) -> str: |
|
|
"""Professional sentiment analysis for trading""" |
|
|
positive_words = [ |
|
|
'surge', 'rally', 'soar', 'jump', 'gain', 'rise', 'climb', |
|
|
'growth', 'positive', 'strong', 'robust', 'beat', 'exceed', |
|
|
'outperform', 'record high', 'breakthrough', 'optimistic' |
|
|
] |
|
|
negative_words = [ |
|
|
'plunge', 'crash', 'tumble', 'fall', 'drop', 'decline', 'slump', |
|
|
'loss', 'weak', 'fragile', 'crisis', 'concern', 'risk', 'fear', |
|
|
'miss', 'disappoint', 'warning', 'downgrade', 'recession' |
|
|
] |
|
|
|
|
|
text_lower = text.lower() |
|
|
pos_count = sum(2 if word in text_lower else 0 for word in positive_words) |
|
|
neg_count = sum(2 if word in text_lower else 0 for word in negative_words) |
|
|
|
|
|
|
|
|
if pos_count > neg_count + 1: |
|
|
return 'positive' |
|
|
elif neg_count > pos_count + 1: |
|
|
return 'negative' |
|
|
return 'neutral' |
|
|
|
|
|
def _assess_impact(self, tweet, source_weight: float) -> str: |
|
|
"""Assess market impact based on engagement and source credibility""" |
|
|
engagement = (tweet.likeCount or 0) + (tweet.retweetCount or 0) * 2 |
|
|
weighted_engagement = engagement * source_weight |
|
|
|
|
|
|
|
|
if self._detect_breaking_news(tweet.content): |
|
|
return 'high' |
|
|
|
|
|
if weighted_engagement > 1500 or source_weight >= 2.0: |
|
|
return 'high' |
|
|
elif weighted_engagement > 300: |
|
|
return 'medium' |
|
|
return 'low' |
|
|
|
|
|
def _detect_breaking_news(self, text: str) -> bool: |
|
|
"""Detect breaking/urgent news for immediate alerts""" |
|
|
text_upper = text.upper() |
|
|
return any(keyword.upper() in text_upper for keyword in self.BREAKING_KEYWORDS) |
|
|
|
|
|
def _extract_summary(self, text: str, max_length: int = 200) -> str: |
|
|
"""Extract clean summary for display""" |
|
|
|
|
|
import re |
|
|
text = re.sub(r'http\S+', '', text) |
|
|
text = text.strip() |
|
|
|
|
|
if len(text) <= max_length: |
|
|
return text |
|
|
return text[:max_length] + '...' |
|
|
|
|
|
def _get_mock_news(self) -> List[Dict]: |
|
|
"""Mock news data when snscrape is unavailable - Showcases all source types""" |
|
|
return [ |
|
|
|
|
|
{ |
|
|
'id': 1, |
|
|
'title': 'BREAKING: Federal Reserve announces emergency rate cut of 50bps - Powell cites economic uncertainty', |
|
|
'summary': 'BREAKING: Fed emergency rate cut 50bps', |
|
|
'source': 'Federal Reserve', |
|
|
'category': 'macro', |
|
|
'timestamp': datetime.now() - timedelta(minutes=5), |
|
|
'sentiment': 'negative', |
|
|
'impact': 'high', |
|
|
'url': 'https://twitter.com/federalreserve', |
|
|
'likes': 5000, |
|
|
'retweets': 2000, |
|
|
'is_breaking': True, |
|
|
'source_weight': 2.0 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 2, |
|
|
'title': '*FIRST SQUAWK: S&P 500 FUTURES DROP 2% AFTER FED ANNOUNCEMENT', |
|
|
'summary': '*FIRST SQUAWK: S&P 500 futures drop 2%', |
|
|
'source': 'First Squawk', |
|
|
'category': 'markets', |
|
|
'timestamp': datetime.now() - timedelta(minutes=10), |
|
|
'sentiment': 'negative', |
|
|
'impact': 'high', |
|
|
'url': 'https://twitter.com/FirstSquawk', |
|
|
'likes': 1500, |
|
|
'retweets': 600, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.1 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 3, |
|
|
'title': 'Apple reports earnings beat with $123B revenue, raises dividend by 4% - Stock up 3% after hours', |
|
|
'summary': 'Apple beats earnings, raises dividend 4%', |
|
|
'source': 'Bloomberg', |
|
|
'category': 'markets', |
|
|
'timestamp': datetime.now() - timedelta(minutes=25), |
|
|
'sentiment': 'positive', |
|
|
'impact': 'high', |
|
|
'url': 'https://twitter.com/business', |
|
|
'likes': 2800, |
|
|
'retweets': 900, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.5 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 4, |
|
|
'title': 'ECB President Lagarde: Inflation remains above target, rates to stay higher for longer', |
|
|
'summary': 'Lagarde: rates to stay higher for longer', |
|
|
'source': 'Lagarde', |
|
|
'category': 'macro', |
|
|
'timestamp': datetime.now() - timedelta(minutes=45), |
|
|
'sentiment': 'neutral', |
|
|
'impact': 'high', |
|
|
'url': 'https://twitter.com/Lagarde', |
|
|
'likes': 1200, |
|
|
'retweets': 400, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.9 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 5, |
|
|
'title': 'Ukraine conflict: New peace talks scheduled as tensions ease in Eastern Europe', |
|
|
'summary': 'Ukraine: New peace talks scheduled', |
|
|
'source': 'BBC World', |
|
|
'category': 'geopolitical', |
|
|
'timestamp': datetime.now() - timedelta(hours=1), |
|
|
'sentiment': 'positive', |
|
|
'impact': 'medium', |
|
|
'url': 'https://twitter.com/BBCWorld', |
|
|
'likes': 3500, |
|
|
'retweets': 1200, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.4 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 6, |
|
|
'title': 'US GDP growth revised up to 2.8% in Q4, beating economists expectations of 2.5%', |
|
|
'summary': 'US GDP growth revised up to 2.8% in Q4', |
|
|
'source': 'Reuters', |
|
|
'category': 'macro', |
|
|
'timestamp': datetime.now() - timedelta(hours=2), |
|
|
'sentiment': 'positive', |
|
|
'impact': 'medium', |
|
|
'url': 'https://twitter.com/Reuters', |
|
|
'likes': 1800, |
|
|
'retweets': 600, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.5 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 7, |
|
|
'title': '*LIVE SQUAWK: Oil prices surge 5% on Middle East supply concerns, Brent crude at $92/barrel', |
|
|
'summary': '*LIVE SQUAWK: Oil surges 5% on supply fears', |
|
|
'source': 'Live Squawk', |
|
|
'category': 'markets', |
|
|
'timestamp': datetime.now() - timedelta(hours=3), |
|
|
'sentiment': 'neutral', |
|
|
'impact': 'medium', |
|
|
'url': 'https://twitter.com/LiveSquawk', |
|
|
'likes': 900, |
|
|
'retweets': 350, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.1 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 8, |
|
|
'title': 'IMF upgrades global growth forecast to 3.2% for 2024, warns of recession risks in Europe', |
|
|
'summary': 'IMF upgrades global growth to 3.2%', |
|
|
'source': 'IMF', |
|
|
'category': 'macro', |
|
|
'timestamp': datetime.now() - timedelta(hours=4), |
|
|
'sentiment': 'neutral', |
|
|
'impact': 'medium', |
|
|
'url': 'https://twitter.com/IMFNews', |
|
|
'likes': 800, |
|
|
'retweets': 300, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.7 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 9, |
|
|
'title': 'US-China trade talks resume in Washington, focus on technology transfer and tariffs', |
|
|
'summary': 'US-China trade talks resume', |
|
|
'source': 'Politico', |
|
|
'category': 'geopolitical', |
|
|
'timestamp': datetime.now() - timedelta(hours=5), |
|
|
'sentiment': 'neutral', |
|
|
'impact': 'low', |
|
|
'url': 'https://twitter.com/politico', |
|
|
'likes': 600, |
|
|
'retweets': 200, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.2 |
|
|
}, |
|
|
|
|
|
{ |
|
|
'id': 10, |
|
|
'title': 'Bank of America cuts recession probability to 20%, cites resilient consumer spending', |
|
|
'summary': 'BofA cuts recession probability to 20%', |
|
|
'source': 'FT', |
|
|
'category': 'markets', |
|
|
'timestamp': datetime.now() - timedelta(hours=6), |
|
|
'sentiment': 'positive', |
|
|
'impact': 'low', |
|
|
'url': 'https://twitter.com/FT', |
|
|
'likes': 700, |
|
|
'retweets': 250, |
|
|
'is_breaking': False, |
|
|
'source_weight': 1.4 |
|
|
} |
|
|
] |
|
|
|
|
|
def get_news(self, category: str = 'all', sentiment: str = 'all', |
|
|
impact: str = 'all', refresh: bool = False) -> pd.DataFrame: |
|
|
""" |
|
|
Get filtered news with intelligent caching |
|
|
|
|
|
Args: |
|
|
category: 'all', 'macro', 'geopolitical', 'markets' |
|
|
sentiment: 'all', 'positive', 'negative', 'neutral' |
|
|
impact: 'all', 'high', 'medium', 'low' |
|
|
refresh: Force refresh cache |
|
|
""" |
|
|
|
|
|
if refresh or not self.last_fetch or \ |
|
|
(datetime.now() - self.last_fetch).seconds > self.cache_ttl: |
|
|
self.news_cache = self.scrape_twitter_news(max_tweets=100) |
|
|
self.last_fetch = datetime.now() |
|
|
|
|
|
news = self.news_cache.copy() |
|
|
|
|
|
|
|
|
if category != 'all': |
|
|
news = [n for n in news if n['category'] == category] |
|
|
|
|
|
if sentiment != 'all': |
|
|
news = [n for n in news if n['sentiment'] == sentiment] |
|
|
|
|
|
if impact != 'all': |
|
|
news = [n for n in news if n['impact'] == impact] |
|
|
|
|
|
df = pd.DataFrame(news) |
|
|
if not df.empty: |
|
|
df['timestamp'] = pd.to_datetime(df['timestamp']) |
|
|
|
|
|
return df |
|
|
|
|
|
def get_breaking_news(self) -> pd.DataFrame: |
|
|
"""Get only breaking/high-impact news for alerts""" |
|
|
df = self.get_news() |
|
|
if not df.empty: |
|
|
return df[df['is_breaking'] == True].head(10) |
|
|
return df |
|
|
|
|
|
def get_statistics(self) -> Dict: |
|
|
"""Get news feed statistics""" |
|
|
if not self.news_cache: |
|
|
return { |
|
|
'total': 0, |
|
|
'high_impact': 0, |
|
|
'breaking': 0, |
|
|
'last_update': 'Never' |
|
|
} |
|
|
|
|
|
return { |
|
|
'total': len(self.news_cache), |
|
|
'high_impact': len([n for n in self.news_cache if n['impact'] == 'high']), |
|
|
'breaking': len([n for n in self.news_cache if n['is_breaking']]), |
|
|
'last_update': self.last_fetch.strftime('%H:%M:%S') if self.last_fetch else 'Never', |
|
|
'by_category': { |
|
|
'macro': len([n for n in self.news_cache if n['category'] == 'macro']), |
|
|
'geopolitical': len([n for n in self.news_cache if n['category'] == 'geopolitical']), |
|
|
'markets': len([n for n in self.news_cache if n['category'] == 'markets']) |
|
|
} |
|
|
} |
|
|
|