"""
Emotion/register classifier module.
Classifies text emotional register (neutral, passionate, cautious, etc.).
Used as one dimension of the style fingerprint.
"""

import re
from typing import Dict


class EmotionClassifier:
    """Classifies emotional register of text using keyword-based analysis."""

    REGISTER_KEYWORDS = {
        "neutral": {
            "states", "indicates", "shows", "reports", "notes",
            "describes", "observed", "found", "results", "data",
            "information", "according", "based", "study", "analysis",
        },
        "passionate": {
            "amazing", "incredible", "extraordinary", "remarkable",
            "outstanding", "excellent", "wonderful", "brilliant",
            "terrible", "devastating", "critical", "urgent",
            "essential", "vital", "crucial", "imperative",
        },
        "cautious": {
            "perhaps", "possibly", "might", "may", "could",
            "seem", "appears", "suggests", "indicates", "tend",
            "potentially", "arguably", "presumably", "conceivably",
            "tentatively", "provisionally",
        },
        "analytical": {
            "therefore", "consequently", "thus", "hence", "because",
            "analysis", "examine", "investigate", "evaluate", "assess",
            "compare", "contrast", "correlate", "determine", "evidence",
            "hypothesis", "methodology", "framework",
        },
        "confident": {
            "clearly", "obviously", "certainly", "definitely",
            "undoubtedly", "indeed", "absolutely", "demonstrate",
            "prove", "establish", "confirm", "guarantee",
            "unquestionably", "invariably",
        },
    }

    def __init__(self):
        pass

    def classify(self, text: str) -> Dict[str, float]:
        """Return emotion distribution over register categories.

        Returns a dict with keys: neutral, passionate, cautious, analytical, confident.
        Values are probabilities that sum to ~1.0.
        """
        if not text or not text.strip():
            return {k: 0.2 for k in self.REGISTER_KEYWORDS}

        words = set(text.lower().split())
        scores = {}

        for register, keywords in self.REGISTER_KEYWORDS.items():
            overlap = len(words & keywords)
            scores[register] = overlap

        # Add punctuation-based signals
        exclamation_count = text.count("!")
        question_count = text.count("?")
        scores["passionate"] = scores.get("passionate", 0) + exclamation_count * 0.5
        scores["cautious"] = scores.get("cautious", 0) + question_count * 0.3

        # Normalise to probability distribution
        total = sum(scores.values())
        if total == 0:
            return {k: 0.2 for k in self.REGISTER_KEYWORDS}

        return {k: v / total for k, v in scores.items()}