Spaces:

SiddharthVenba
/

Swing_Quant_Engine

Running

App Files Files Community

Swing_Quant_Engine / backend /intelligence /alert_classifier.py

SiddharthVenba

Initial commit for HF Space

75d9b3c about 2 months ago

Raw

History Blame Contribute Delete

3.54 kB

	"""
	Alert Classifier — Categorizes news into three concern tiers.
	Uses keyword rules first (zero tokens), LLM fallback for ambiguous items.

	Tiers:
	🔴 high — Black swan, major earnings miss, CEO departure, legal action
	⚠️ medium — Analyst downgrade, rate changes, sector regulatory, >5% price move
	🟢 low — Standard disclosures, minor analyst changes, general commentary
	"""
	import logging
	import re

	logger = logging.getLogger(__name__)

	# ── Rule-Based Classification (Zero Token Cost) ──

	HIGH_KEYWORDS = [
	r"\bfraud\b", r"\bscam\b", r"\bbankrupt", r"\bdefault\b",
	r"\bceo\s+(resign\|depart\|step\|fired\|ousted\|quit)",
	r"\bcfo\s+(resign\|depart\|step\|fired\|ousted\|quit)",
	r"\blegal\s+action", r"\blawsuit\b", r"\bsec\s+investigat",
	r"\bearnings\s+miss", r"\bprofit\s+warning",
	r"\bcrash\b", r"\bplunge\b", r"\btumble\b",
	r"\bhalt\s+trad", r"\bsuspend\b",
	r"\bblack\s*swan\b", r"\bwar\b", r"\bsanction\b",
	r"\brecall\b", r"\bdata\s+breach\b",
	r"\bcollaps\b", r"\bliquidat\b",
	r"\b(drop\|fall\|crash\|sink\|plummet\|tank)\w*\s+\d{2,}%", # >10% drop
	]

	MEDIUM_KEYWORDS = [
	r"\bdowngrade\b", r"\bupgrade\b",
	r"\brate\s+(cut\|hike\|change\|decision)",
	r"\binterest\s+rate", r"\brbi\b", r"\bfed\b.*\brate\b",
	r"\bregulat\w+\s+(inquiry\|probe\|action\|scrutiny)",
	r"\btariff\b", r"\btrade\s+war\b",
	r"\bsector\s+(rotation\|shift\|sell)",
	r"\banalyst\b.*\b(target\|cut\|raise\|lower)",
	r"\bmerger\b", r"\bacquisition\b", r"\btakeover\b",
	r"\bipo\b", r"\bdelisting\b",
	r"\b(rise\|gain\|surge\|jump\|rally)\w*\s+[5-9]%", # 5-9% move
	r"\b(drop\|fall\|decline)\w*\s+[5-9]%",
	r"\binflation\b", r"\bgdp\b",
	r"\bearnings\b.*\b(beat\|miss\|surprise)",
	r"\bdividend\s+(cut\|slash\|suspend)",
	r"\brestructur", r"\blayoff\b", r"\bjob\s+cut",
	]

	# Compile patterns
	_HIGH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in HIGH_KEYWORDS]
	_MEDIUM_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MEDIUM_KEYWORDS]


	def classify_concern_level(
	title: str,
	summary: str = "",
	sentiment: str = "neutral",
	confidence: float = 0.5,
	tickers_mentioned: list[str] \| None = None,
	) -> str:
	"""
	Classify a news item into concern level: 'high', 'medium', or 'low'.
	Uses rule-based keyword matching first (zero token cost).
	"""
	text = f"{title} {summary}".lower()

	# ── High Priority Check ──
	for pattern in _HIGH_PATTERNS:
	if pattern.search(text):
	logger.debug(f"HIGH match: {pattern.pattern} in '{title[:60]}'")
	return "high"

	# ── Medium Priority Check ──
	for pattern in _MEDIUM_PATTERNS:
	if pattern.search(text):
	logger.debug(f"MEDIUM match: {pattern.pattern} in '{title[:60]}'")
	return "medium"

	# ── Sentiment-Based Boost ──
	# Strong negative sentiment on portfolio holdings → upgrade to medium
	if (sentiment == "negative" and confidence >= 0.7
	and tickers_mentioned and len(tickers_mentioned) > 0):
	return "medium"

	# ── Default ──
	return "low"


	def get_concern_label(level: str) -> dict:
	"""Get display label and emoji for a concern level."""
	labels = {
	"high": {"emoji": "🔴", "label": "IMMEDIATE", "color": "#f43f5e"},
	"medium": {"emoji": "⚠️", "label": "MEDIUM CONCERN", "color": "#f59e0b"},
	"low": {"emoji": "🟢", "label": "REGULAR", "color": "#34d399"},
	}
	return labels.get(level, labels["low"])