"""
Alert Classifier — Categorizes news into three concern tiers.
Uses keyword rules first (zero tokens), LLM fallback for ambiguous items.

Tiers:
  🔴 high     — Black swan, major earnings miss, CEO departure, legal action
  ⚠️ medium   — Analyst downgrade, rate changes, sector regulatory, >5% price move
  🟢 low      — Standard disclosures, minor analyst changes, general commentary
"""
import logging
import re

logger = logging.getLogger(__name__)

# ── Rule-Based Classification (Zero Token Cost) ──

HIGH_KEYWORDS = [
    r"\bfraud\b", r"\bscam\b", r"\bbankrupt", r"\bdefault\b",
    r"\bceo\s+(resign|depart|step|fired|ousted|quit)",
    r"\bcfo\s+(resign|depart|step|fired|ousted|quit)",
    r"\blegal\s+action", r"\blawsuit\b", r"\bsec\s+investigat",
    r"\bearnings\s+miss", r"\bprofit\s+warning",
    r"\bcrash\b", r"\bplunge\b", r"\btumble\b",
    r"\bhalt\s+trad", r"\bsuspend\b",
    r"\bblack\s*swan\b", r"\bwar\b", r"\bsanction\b",
    r"\brecall\b", r"\bdata\s+breach\b",
    r"\bcollaps\b", r"\bliquidat\b",
    r"\b(drop|fall|crash|sink|plummet|tank)\w*\s+\d{2,}%",  # >10% drop
]

MEDIUM_KEYWORDS = [
    r"\bdowngrade\b", r"\bupgrade\b",
    r"\brate\s+(cut|hike|change|decision)",
    r"\binterest\s+rate", r"\brbi\b", r"\bfed\b.*\brate\b",
    r"\bregulat\w+\s+(inquiry|probe|action|scrutiny)",
    r"\btariff\b", r"\btrade\s+war\b",
    r"\bsector\s+(rotation|shift|sell)", 
    r"\banalyst\b.*\b(target|cut|raise|lower)",
    r"\bmerger\b", r"\bacquisition\b", r"\btakeover\b",
    r"\bipo\b", r"\bdelisting\b",
    r"\b(rise|gain|surge|jump|rally)\w*\s+[5-9]%",  # 5-9% move
    r"\b(drop|fall|decline)\w*\s+[5-9]%",
    r"\binflation\b", r"\bgdp\b",
    r"\bearnings\b.*\b(beat|miss|surprise)",
    r"\bdividend\s+(cut|slash|suspend)",
    r"\brestructur", r"\blayoff\b", r"\bjob\s+cut",
]

# Compile patterns
_HIGH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in HIGH_KEYWORDS]
_MEDIUM_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MEDIUM_KEYWORDS]


def classify_concern_level(
    title: str,
    summary: str = "",
    sentiment: str = "neutral",
    confidence: float = 0.5,
    tickers_mentioned: list[str] | None = None,
) -> str:
    """
    Classify a news item into concern level: 'high', 'medium', or 'low'.
    Uses rule-based keyword matching first (zero token cost).
    """
    text = f"{title} {summary}".lower()

    # ── High Priority Check ──
    for pattern in _HIGH_PATTERNS:
        if pattern.search(text):
            logger.debug(f"HIGH match: {pattern.pattern} in '{title[:60]}'")
            return "high"

    # ── Medium Priority Check ──
    for pattern in _MEDIUM_PATTERNS:
        if pattern.search(text):
            logger.debug(f"MEDIUM match: {pattern.pattern} in '{title[:60]}'")
            return "medium"

    # ── Sentiment-Based Boost ──
    # Strong negative sentiment on portfolio holdings → upgrade to medium
    if (sentiment == "negative" and confidence >= 0.7
            and tickers_mentioned and len(tickers_mentioned) > 0):
        return "medium"

    # ── Default ──
    return "low"


def get_concern_label(level: str) -> dict:
    """Get display label and emoji for a concern level."""
    labels = {
        "high": {"emoji": "🔴", "label": "IMMEDIATE", "color": "#f43f5e"},
        "medium": {"emoji": "⚠️", "label": "MEDIUM CONCERN", "color": "#f59e0b"},
        "low": {"emoji": "🟢", "label": "REGULAR", "color": "#34d399"},
    }
    return labels.get(level, labels["low"])