| """ |
| Alert Classifier β Categorizes news into three concern tiers. |
| Uses keyword rules first (zero tokens), LLM fallback for ambiguous items. |
| |
| Tiers: |
| π΄ high β Black swan, major earnings miss, CEO departure, legal action |
| β οΈ medium β Analyst downgrade, rate changes, sector regulatory, >5% price move |
| π’ low β Standard disclosures, minor analyst changes, general commentary |
| """ |
| import logging |
| import re |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
|
|
| HIGH_KEYWORDS = [ |
| r"\bfraud\b", r"\bscam\b", r"\bbankrupt", r"\bdefault\b", |
| r"\bceo\s+(resign|depart|step|fired|ousted|quit)", |
| r"\bcfo\s+(resign|depart|step|fired|ousted|quit)", |
| r"\blegal\s+action", r"\blawsuit\b", r"\bsec\s+investigat", |
| r"\bearnings\s+miss", r"\bprofit\s+warning", |
| r"\bcrash\b", r"\bplunge\b", r"\btumble\b", |
| r"\bhalt\s+trad", r"\bsuspend\b", |
| r"\bblack\s*swan\b", r"\bwar\b", r"\bsanction\b", |
| r"\brecall\b", r"\bdata\s+breach\b", |
| r"\bcollaps\b", r"\bliquidat\b", |
| r"\b(drop|fall|crash|sink|plummet|tank)\w*\s+\d{2,}%", |
| ] |
|
|
| MEDIUM_KEYWORDS = [ |
| r"\bdowngrade\b", r"\bupgrade\b", |
| r"\brate\s+(cut|hike|change|decision)", |
| r"\binterest\s+rate", r"\brbi\b", r"\bfed\b.*\brate\b", |
| r"\bregulat\w+\s+(inquiry|probe|action|scrutiny)", |
| r"\btariff\b", r"\btrade\s+war\b", |
| r"\bsector\s+(rotation|shift|sell)", |
| r"\banalyst\b.*\b(target|cut|raise|lower)", |
| r"\bmerger\b", r"\bacquisition\b", r"\btakeover\b", |
| r"\bipo\b", r"\bdelisting\b", |
| r"\b(rise|gain|surge|jump|rally)\w*\s+[5-9]%", |
| r"\b(drop|fall|decline)\w*\s+[5-9]%", |
| r"\binflation\b", r"\bgdp\b", |
| r"\bearnings\b.*\b(beat|miss|surprise)", |
| r"\bdividend\s+(cut|slash|suspend)", |
| r"\brestructur", r"\blayoff\b", r"\bjob\s+cut", |
| ] |
|
|
| |
| _HIGH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in HIGH_KEYWORDS] |
| _MEDIUM_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MEDIUM_KEYWORDS] |
|
|
|
|
| def classify_concern_level( |
| title: str, |
| summary: str = "", |
| sentiment: str = "neutral", |
| confidence: float = 0.5, |
| tickers_mentioned: list[str] | None = None, |
| ) -> str: |
| """ |
| Classify a news item into concern level: 'high', 'medium', or 'low'. |
| Uses rule-based keyword matching first (zero token cost). |
| """ |
| text = f"{title} {summary}".lower() |
|
|
| |
| for pattern in _HIGH_PATTERNS: |
| if pattern.search(text): |
| logger.debug(f"HIGH match: {pattern.pattern} in '{title[:60]}'") |
| return "high" |
|
|
| |
| for pattern in _MEDIUM_PATTERNS: |
| if pattern.search(text): |
| logger.debug(f"MEDIUM match: {pattern.pattern} in '{title[:60]}'") |
| return "medium" |
|
|
| |
| |
| if (sentiment == "negative" and confidence >= 0.7 |
| and tickers_mentioned and len(tickers_mentioned) > 0): |
| return "medium" |
|
|
| |
| return "low" |
|
|
|
|
| def get_concern_label(level: str) -> dict: |
| """Get display label and emoji for a concern level.""" |
| labels = { |
| "high": {"emoji": "π΄", "label": "IMMEDIATE", "color": "#f43f5e"}, |
| "medium": {"emoji": "β οΈ", "label": "MEDIUM CONCERN", "color": "#f59e0b"}, |
| "low": {"emoji": "π’", "label": "REGULAR", "color": "#34d399"}, |
| } |
| return labels.get(level, labels["low"]) |
|
|