""" Alert Classifier — Categorizes news into three concern tiers. Uses keyword rules first (zero tokens), LLM fallback for ambiguous items. Tiers: 🔴 high — Black swan, major earnings miss, CEO departure, legal action ⚠️ medium — Analyst downgrade, rate changes, sector regulatory, >5% price move 🟢 low — Standard disclosures, minor analyst changes, general commentary """ import logging import re logger = logging.getLogger(__name__) # ── Rule-Based Classification (Zero Token Cost) ── HIGH_KEYWORDS = [ r"\bfraud\b", r"\bscam\b", r"\bbankrupt", r"\bdefault\b", r"\bceo\s+(resign|depart|step|fired|ousted|quit)", r"\bcfo\s+(resign|depart|step|fired|ousted|quit)", r"\blegal\s+action", r"\blawsuit\b", r"\bsec\s+investigat", r"\bearnings\s+miss", r"\bprofit\s+warning", r"\bcrash\b", r"\bplunge\b", r"\btumble\b", r"\bhalt\s+trad", r"\bsuspend\b", r"\bblack\s*swan\b", r"\bwar\b", r"\bsanction\b", r"\brecall\b", r"\bdata\s+breach\b", r"\bcollaps\b", r"\bliquidat\b", r"\b(drop|fall|crash|sink|plummet|tank)\w*\s+\d{2,}%", # >10% drop ] MEDIUM_KEYWORDS = [ r"\bdowngrade\b", r"\bupgrade\b", r"\brate\s+(cut|hike|change|decision)", r"\binterest\s+rate", r"\brbi\b", r"\bfed\b.*\brate\b", r"\bregulat\w+\s+(inquiry|probe|action|scrutiny)", r"\btariff\b", r"\btrade\s+war\b", r"\bsector\s+(rotation|shift|sell)", r"\banalyst\b.*\b(target|cut|raise|lower)", r"\bmerger\b", r"\bacquisition\b", r"\btakeover\b", r"\bipo\b", r"\bdelisting\b", r"\b(rise|gain|surge|jump|rally)\w*\s+[5-9]%", # 5-9% move r"\b(drop|fall|decline)\w*\s+[5-9]%", r"\binflation\b", r"\bgdp\b", r"\bearnings\b.*\b(beat|miss|surprise)", r"\bdividend\s+(cut|slash|suspend)", r"\brestructur", r"\blayoff\b", r"\bjob\s+cut", ] # Compile patterns _HIGH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in HIGH_KEYWORDS] _MEDIUM_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MEDIUM_KEYWORDS] def classify_concern_level( title: str, summary: str = "", sentiment: str = "neutral", confidence: float = 0.5, tickers_mentioned: list[str] | None = None, ) -> str: """ Classify a news item into concern level: 'high', 'medium', or 'low'. Uses rule-based keyword matching first (zero token cost). """ text = f"{title} {summary}".lower() # ── High Priority Check ── for pattern in _HIGH_PATTERNS: if pattern.search(text): logger.debug(f"HIGH match: {pattern.pattern} in '{title[:60]}'") return "high" # ── Medium Priority Check ── for pattern in _MEDIUM_PATTERNS: if pattern.search(text): logger.debug(f"MEDIUM match: {pattern.pattern} in '{title[:60]}'") return "medium" # ── Sentiment-Based Boost ── # Strong negative sentiment on portfolio holdings → upgrade to medium if (sentiment == "negative" and confidence >= 0.7 and tickers_mentioned and len(tickers_mentioned) > 0): return "medium" # ── Default ── return "low" def get_concern_label(level: str) -> dict: """Get display label and emoji for a concern level.""" labels = { "high": {"emoji": "🔴", "label": "IMMEDIATE", "color": "#f43f5e"}, "medium": {"emoji": "⚠️", "label": "MEDIUM CONCERN", "color": "#f59e0b"}, "low": {"emoji": "🟢", "label": "REGULAR", "color": "#34d399"}, } return labels.get(level, labels["low"])