Premchan369
/

alphaforge-quant-system

+"""News Intelligence v1.0 — Real-Time News Sentiment + Event Detection
+FinBERT-based sentiment scoring with event classification.
+Falls back to regex-based analysis if FinBERT unavailable.
+"""
+import re, os, json, requests
+from datetime import datetime, timedelta
+from typing import List, Dict, Optional, Tuple
+import numpy as np
+# ── Event detection keywords ─────────────────────────────────
+EVENT_PATTERNS = {
+    'earnings':      ['earnings', 'quarterly', 'revenue', 'eps', 'profit', 'q[1-4]', 'fiscal'],
+    'fed':           ['federal reserve', 'fed', 'fomc', 'interest rate', 'rate hike', 'rate cut', 'powell'],
+    'cpi':           ['cpi', 'inflation', 'consumer price', 'core pce'],
+    'jobs':          ['jobs report', 'unemployment', 'nfp', 'nonfarm payroll', 'labor'],
+    'lawsuit':       ['lawsuit', 'sec', 'doj', 'investigation', 'antitrust', 'fine', 'settlement'],
+    'merger':        ['merger', 'acquisition', 'acquire', 'buyout', 'merging', 'takeover'],
+    'dividend':      ['dividend', 'buyback', 'share repurchase', 'dividend yield'],
+    'split':         ['stock split', 'split', 'reverse split'],
+    'upgrade':       ['upgrade', 'upgraded', 'overweight', 'buy rating', 'price target raised'],
+    'downgrade':     ['downgrade', 'downgraded', 'underweight', 'sell rating', 'price target cut'],
+    'product':       ['product launch', 'new product', 'iphone', 'ai model', 'release date'],
+    'supply_chain':  ['supply chain', 'shortage', 'inventory', 'chip shortage', 'factory'],
+    'macro':         ['gdp', 'recession', 'economic growth', 'fiscal policy', 'stimulus'],
+    'geopolitical':  ['war', 'sanctions', 'tension', 'china', 'trade war', 'tariff'],
+    'analyst':       ['analyst', 'wall street', 'target price', 'consensus'],
+}
+BULLISH_WORDS = [
+    'beat', 'strong', 'growth', 'surge', 'rally', 'bullish', 'outperform',
+    'exceed', 'record', 'milestone', 'breakthrough', 'partnership', 'launch',
+    'innovation', 'momentum', 'premium', 'dominant', 'leader', 'expansion'
+]
+BEARISH_WORDS = [
+    'miss', 'weak', 'decline', 'drop', 'crash', 'bearish', 'underperform',
+    'loss', 'concern', 'warning', 'risk', 'lawsuit', 'investigation',
+    'fraud', 'default', 'bankruptcy', 'layoff', 'cut', 'slash', 'downturn',
+    'recession', 'contagion', 'crisis', 'collapse'
+]
+class NewsIntelligence:
+    """Multi-source news sentiment with FinBERT + rule-based fallback."""
+    def __init__(self, finbert_available: bool = None, cache_dir: str = ".cache/news"):
+        self.cache_dir = cache_dir
+        os.makedirs(cache_dir, exist_ok=True)
+        self._finbert = None
+        self._tokenizer = None
+        self._sentiment_cache = {}  # ticker -> {date: score}
+        if finbert_available is None:
+            try:
+                from transformers import AutoTokenizer, AutoModelForSequenceClassification
+                self._tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
+                self._finbert = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
+                self._finbert.eval()
+                finbert_available = True
+            except Exception:
+                finbert_available = False
+        self.use_finbert = finbert_available
+    def classify_event(self, headline: str, summary: str = "") -> Tuple[str, float]:
+        """Classify article into event type and severity (0-1)."""
+        text = (headline + " " + summary).lower()
+        scores = {}
+        for event_type, patterns in EVENT_PATTERNS.items():
+            score = 0
+            for pat in patterns:
+                count = len(re.findall(pat, text))
+                score += count
+            if score > 0:
+                scores[event_type] = score
+        if not scores:
+            return 'general', 0.1
+        best = max(scores, key=scores.get)
+        return best, min(1.0, scores[best] * 0.5)
+    def rule_sentiment(self, headline: str, summary: str = "") -> Dict:
+        """Rule-based sentiment as fallback when FinBERT unavailable."""
+        text = (headline + " " + summary).lower()
+        bull = sum(text.count(w) for w in BULLISH_WORDS)
+        bear = sum(text.count(w) for w in BEARISH_WORDS)
+        total = bull + bear + 1e-10
+        # Map to 0-100 scale
+        sentiment = 50 + (bull - bear) / total * 50
+        confidence = min(1.0, total * 0.1)
+        return {
+            'score': max(0, min(100, sentiment)),
+            'confidence': confidence,
+            'method': 'rule'
+        }
+    def finbert_sentiment(self, headline: str, summary: str = "") -> Dict:
+        """FinBERT inference. Returns score 0-100."""
+        if not self.use_finbert:
+            return self.rule_sentiment(headline, summary)
+        import torch
+        text = headline
+        if summary:
+            text += ". " + summary[:500]
+        inputs = self._tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+        with torch.no_grad():
+            outputs = self._finbert(**inputs)
+            probs = torch.softmax(outputs.logits, dim=1)[0].numpy()
+        # FinBERT: [negative, neutral, positive]
+        neg, neu, pos = probs
+        # Map to 0-100
+        score = 50 + (pos - neg) * 50
+        confidence = 1 - neu  # Higher confidence when less neutral
+        return {
+            'score': max(0, min(100, score)),
+            'confidence': float(confidence),
+            'probs': {'negative': float(neg), 'neutral': float(neu), 'positive': float(pos)},
+            'method': 'finbert'
+        }
+    def analyze_article(self, headline: str, summary: str = "",
+                        timestamp: str = None) -> Dict:
+        """Full article analysis: sentiment + event classification."""
+        event_type, event_severity = self.classify_event(headline, summary)
+        sentiment = self.finbert_sentiment(headline, summary)
+        # Adjust sentiment for event context
+        event_sentiment_override = {
+            'earnings': 0,
+            'fed': -10,
+            'lawsuit': -25,
+            'upgrade': +20,
+            'downgrade': -20,
+            'merger': +15,
+            'dividend': +10,
+            'product': +15,
+        }
+        adj_score = sentiment['score']
+        if event_type in event_sentiment_override:
+            adj_score += event_sentiment_override[event_type]
+            sentiment['adjusted_score'] = max(0, min(100, adj_score))
+        else:
+            sentiment['adjusted_score'] = adj_score
+        return {
+            'headline': headline,
+            'summary': summary[:200] if summary else "",
+            'timestamp': timestamp or datetime.now().isoformat(),
+            'sentiment': sentiment,
+            'event': {
+                'type': event_type,
+                'severity': event_severity,
+            }
+        }
+    def fetch_newsapi(self, query: str, api_key: str = None, days: int = 7) -> List[Dict]:
+        """Fetch news from NewsAPI. Returns list of article analyses."""
+        if not api_key:
+            api_key = os.environ.get('NEWSAPI_KEY')
+        if not api_key:
+            return self._mock_news(query)
+        from_date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
+        url = f"https://newsapi.org/v2/everything?q={query}&from={from_date}&sortBy=publishedAt&language=en&apiKey={api_key}"
+        try:
+            r = requests.get(url, timeout=15)
+            r.raise_for_status()
+            articles = r.json().get('articles', [])
+            results = []
+            for art in articles[:10]:
+                analysis = self.analyze_article(
+                    art.get('title', ''),
+                    art.get('description', ''),
+                    art.get('publishedAt')
+                )
+                results.append(analysis)
+            return results
+        except Exception as e:
+            print(f"NewsAPI error: {e}")
+            return self._mock_news(query)
+    def fetch_yfinance_news(self, ticker: str) -> List[Dict]:
+        """Fetch news from yfinance."""
+        try:
+            import yfinance as yf
+            t = yf.Ticker(ticker)
+            news = t.news or []
+            results = []
+            for item in news[:10]:
+                title = item.get('title', '') or item.get('content', {}).get('title', '')
+                summary = item.get('summary', '') or item.get('content', {}).get('summary', '')
+                analysis = self.analyze_article(title, summary)
+                results.append(analysis)
+            return results
+        except Exception as e:
+            print(f"yfinance news error: {e}")
+            return self._mock_news(ticker)
+    def aggregate_sentiment(self, articles: List[Dict]) -> Dict:
+        """Aggregate sentiment across articles with recency weighting."""
+        if not articles:
+            return {'score': 50, 'confidence': 0, 'volume': 0, 'trend': 'neutral'}
+        scores = []
+        for art in articles:
+            adj = art['sentiment'].get('adjusted_score', art['sentiment']['score'])
+            conf = art['sentiment'].get('confidence', 0.5)
+            scores.append((adj, conf))
+        if not scores:
+            return {'score': 50, 'confidence': 0, 'volume': 0, 'trend': 'neutral'}
+        # Weighted average by confidence
+        total_weight = sum(conf for _, conf in scores) + 1e-10
+        weighted_score = sum(s * c for s, c in scores) / total_weight
+        # Count by sentiment
+        bullish = sum(1 for s, _ in scores if s > 55)
+        bearish = sum(1 for s, _ in scores if s < 45)
+        neutral = sum(1 for s, _ in scores if 45 <= s <= 55)
+        volume = len(scores)
+        if bullish > bearish * 2:
+            trend = 'strong_bullish'
+        elif bullish > bearish:
+            trend = 'bullish'
+        elif bearish > bullish * 2:
+            trend = 'strong_bearish'
+        elif bearish > bullish:
+            trend = 'bearish'
+        else:
+            trend = 'mixed'
+        # Dominant event
+        events = [a['event']['type'] for a in articles]
+        event_counts = {}
+        for e in events:
+            event_counts[e] = event_counts.get(e, 0) + 1
+        dominant_event = max(event_counts, key=event_counts.get) if event_counts else 'general'
+        return {
+            'score': round(weighted_score, 1),
+            'confidence': round(total_weight / volume, 2),
+            'volume': volume,
+            'trend': trend,
+            'bullish_count': bullish,
+            'bearish_count': bearish,
+            'neutral_count': neutral,
+            'dominant_event': dominant_event,
+            'event_counts': event_counts,
+        }
+    def _mock_news(self, query: str) -> List[Dict]:
+        """Mock news for testing without API keys."""
+        mock = [
+            f"{query} beats earnings expectations, revenue surges 15%",
+            f"{query} announces new AI product partnership",
+            f"Analysts upgrade {query} to overweight, target raised to $500",
+            f"{query} faces supply chain headwinds in Q3",
+            f"{query} maintains guidance despite macro uncertainty",
+        ]
+        return [self.analyze_article(h) for h in mock]
+    def get_full_analysis(self, ticker: str, market: str = 'US', period_days: int = 7) -> Dict:
+        """Full news intelligence pipeline for a ticker."""
+        # Try yfinance first
+        articles = self.fetch_yfinance_news(ticker)
+        # If insufficient, try NewsAPI
+        if len(articles) < 3:
+            api_articles = self.fetch_newsapi(ticker, days=period_days)
+            articles.extend(api_articles)
+        # Deduplicate by headline
+        seen = set()
+        unique = []
+        for a in articles:
+            key = a['headline'][:50].lower()
+            if key not in seen:
+                seen.add(key)
+                unique.append(a)
+        sentiment = self.aggregate_sentiment(unique)
+        sentiment['articles'] = unique[:5]  # Top 5
+        sentiment['ticker'] = ticker
+        sentiment['market'] = market
+        sentiment['timestamp'] = datetime.now().isoformat()
+        return sentiment
+if __name__ == '__main__':
+    ni = NewsIntelligence()
+    result = ni.get_full_analysis('AAPL')
+    print(f"Sentiment Score: {result['score']}/100")
+    print(f"Trend: {result['trend']}")
+    print(f"Dominant Event: {result['dominant_event']}")
+    print(f"Article Count: {result['volume']}")
+    for art in result['articles'][:3]:
+        print(f"\n  📰 {art['headline']}")
+        print(f"     Score: {art['sentiment']['adjusted_score']:.1f} | Event: {art['event']['type']}")