""" Emotion/register classifier module. Classifies text emotional register (neutral, passionate, cautious, etc.). Used as one dimension of the style fingerprint. """ import re from typing import Dict class EmotionClassifier: """Classifies emotional register of text using keyword-based analysis.""" REGISTER_KEYWORDS = { "neutral": { "states", "indicates", "shows", "reports", "notes", "describes", "observed", "found", "results", "data", "information", "according", "based", "study", "analysis", }, "passionate": { "amazing", "incredible", "extraordinary", "remarkable", "outstanding", "excellent", "wonderful", "brilliant", "terrible", "devastating", "critical", "urgent", "essential", "vital", "crucial", "imperative", }, "cautious": { "perhaps", "possibly", "might", "may", "could", "seem", "appears", "suggests", "indicates", "tend", "potentially", "arguably", "presumably", "conceivably", "tentatively", "provisionally", }, "analytical": { "therefore", "consequently", "thus", "hence", "because", "analysis", "examine", "investigate", "evaluate", "assess", "compare", "contrast", "correlate", "determine", "evidence", "hypothesis", "methodology", "framework", }, "confident": { "clearly", "obviously", "certainly", "definitely", "undoubtedly", "indeed", "absolutely", "demonstrate", "prove", "establish", "confirm", "guarantee", "unquestionably", "invariably", }, } def __init__(self): pass def classify(self, text: str) -> Dict[str, float]: """Return emotion distribution over register categories. Returns a dict with keys: neutral, passionate, cautious, analytical, confident. Values are probabilities that sum to ~1.0. """ if not text or not text.strip(): return {k: 0.2 for k in self.REGISTER_KEYWORDS} words = set(text.lower().split()) scores = {} for register, keywords in self.REGISTER_KEYWORDS.items(): overlap = len(words & keywords) scores[register] = overlap # Add punctuation-based signals exclamation_count = text.count("!") question_count = text.count("?") scores["passionate"] = scores.get("passionate", 0) + exclamation_count * 0.5 scores["cautious"] = scores.get("cautious", 0) + question_count * 0.3 # Normalise to probability distribution total = sum(scores.values()) if total == 0: return {k: 0.2 for k in self.REGISTER_KEYWORDS} return {k: v / total for k, v in scores.items()}