Swing_Quant_Engine / backend /intelligence /alert_classifier.py
SiddharthVenba's picture
Initial commit for HF Space
75d9b3c
Raw
History Blame Contribute Delete
3.54 kB
"""
Alert Classifier β€” Categorizes news into three concern tiers.
Uses keyword rules first (zero tokens), LLM fallback for ambiguous items.
Tiers:
πŸ”΄ high β€” Black swan, major earnings miss, CEO departure, legal action
⚠️ medium β€” Analyst downgrade, rate changes, sector regulatory, >5% price move
🟒 low β€” Standard disclosures, minor analyst changes, general commentary
"""
import logging
import re
logger = logging.getLogger(__name__)
# ── Rule-Based Classification (Zero Token Cost) ──
HIGH_KEYWORDS = [
r"\bfraud\b", r"\bscam\b", r"\bbankrupt", r"\bdefault\b",
r"\bceo\s+(resign|depart|step|fired|ousted|quit)",
r"\bcfo\s+(resign|depart|step|fired|ousted|quit)",
r"\blegal\s+action", r"\blawsuit\b", r"\bsec\s+investigat",
r"\bearnings\s+miss", r"\bprofit\s+warning",
r"\bcrash\b", r"\bplunge\b", r"\btumble\b",
r"\bhalt\s+trad", r"\bsuspend\b",
r"\bblack\s*swan\b", r"\bwar\b", r"\bsanction\b",
r"\brecall\b", r"\bdata\s+breach\b",
r"\bcollaps\b", r"\bliquidat\b",
r"\b(drop|fall|crash|sink|plummet|tank)\w*\s+\d{2,}%", # >10% drop
]
MEDIUM_KEYWORDS = [
r"\bdowngrade\b", r"\bupgrade\b",
r"\brate\s+(cut|hike|change|decision)",
r"\binterest\s+rate", r"\brbi\b", r"\bfed\b.*\brate\b",
r"\bregulat\w+\s+(inquiry|probe|action|scrutiny)",
r"\btariff\b", r"\btrade\s+war\b",
r"\bsector\s+(rotation|shift|sell)",
r"\banalyst\b.*\b(target|cut|raise|lower)",
r"\bmerger\b", r"\bacquisition\b", r"\btakeover\b",
r"\bipo\b", r"\bdelisting\b",
r"\b(rise|gain|surge|jump|rally)\w*\s+[5-9]%", # 5-9% move
r"\b(drop|fall|decline)\w*\s+[5-9]%",
r"\binflation\b", r"\bgdp\b",
r"\bearnings\b.*\b(beat|miss|surprise)",
r"\bdividend\s+(cut|slash|suspend)",
r"\brestructur", r"\blayoff\b", r"\bjob\s+cut",
]
# Compile patterns
_HIGH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in HIGH_KEYWORDS]
_MEDIUM_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MEDIUM_KEYWORDS]
def classify_concern_level(
title: str,
summary: str = "",
sentiment: str = "neutral",
confidence: float = 0.5,
tickers_mentioned: list[str] | None = None,
) -> str:
"""
Classify a news item into concern level: 'high', 'medium', or 'low'.
Uses rule-based keyword matching first (zero token cost).
"""
text = f"{title} {summary}".lower()
# ── High Priority Check ──
for pattern in _HIGH_PATTERNS:
if pattern.search(text):
logger.debug(f"HIGH match: {pattern.pattern} in '{title[:60]}'")
return "high"
# ── Medium Priority Check ──
for pattern in _MEDIUM_PATTERNS:
if pattern.search(text):
logger.debug(f"MEDIUM match: {pattern.pattern} in '{title[:60]}'")
return "medium"
# ── Sentiment-Based Boost ──
# Strong negative sentiment on portfolio holdings β†’ upgrade to medium
if (sentiment == "negative" and confidence >= 0.7
and tickers_mentioned and len(tickers_mentioned) > 0):
return "medium"
# ── Default ──
return "low"
def get_concern_label(level: str) -> dict:
"""Get display label and emoji for a concern level."""
labels = {
"high": {"emoji": "πŸ”΄", "label": "IMMEDIATE", "color": "#f43f5e"},
"medium": {"emoji": "⚠️", "label": "MEDIUM CONCERN", "color": "#f59e0b"},
"low": {"emoji": "🟒", "label": "REGULAR", "color": "#34d399"},
}
return labels.get(level, labels["low"])