| """ |
| Emotion/register classifier module. |
| Classifies text emotional register (neutral, passionate, cautious, etc.). |
| Used as one dimension of the style fingerprint. |
| """ |
|
|
| import re |
| from typing import Dict |
|
|
|
|
| class EmotionClassifier: |
| """Classifies emotional register of text using keyword-based analysis.""" |
|
|
| REGISTER_KEYWORDS = { |
| "neutral": { |
| "states", "indicates", "shows", "reports", "notes", |
| "describes", "observed", "found", "results", "data", |
| "information", "according", "based", "study", "analysis", |
| }, |
| "passionate": { |
| "amazing", "incredible", "extraordinary", "remarkable", |
| "outstanding", "excellent", "wonderful", "brilliant", |
| "terrible", "devastating", "critical", "urgent", |
| "essential", "vital", "crucial", "imperative", |
| }, |
| "cautious": { |
| "perhaps", "possibly", "might", "may", "could", |
| "seem", "appears", "suggests", "indicates", "tend", |
| "potentially", "arguably", "presumably", "conceivably", |
| "tentatively", "provisionally", |
| }, |
| "analytical": { |
| "therefore", "consequently", "thus", "hence", "because", |
| "analysis", "examine", "investigate", "evaluate", "assess", |
| "compare", "contrast", "correlate", "determine", "evidence", |
| "hypothesis", "methodology", "framework", |
| }, |
| "confident": { |
| "clearly", "obviously", "certainly", "definitely", |
| "undoubtedly", "indeed", "absolutely", "demonstrate", |
| "prove", "establish", "confirm", "guarantee", |
| "unquestionably", "invariably", |
| }, |
| } |
|
|
| def __init__(self): |
| pass |
|
|
| def classify(self, text: str) -> Dict[str, float]: |
| """Return emotion distribution over register categories. |
| |
| Returns a dict with keys: neutral, passionate, cautious, analytical, confident. |
| Values are probabilities that sum to ~1.0. |
| """ |
| if not text or not text.strip(): |
| return {k: 0.2 for k in self.REGISTER_KEYWORDS} |
|
|
| words = set(text.lower().split()) |
| scores = {} |
|
|
| for register, keywords in self.REGISTER_KEYWORDS.items(): |
| overlap = len(words & keywords) |
| scores[register] = overlap |
|
|
| |
| exclamation_count = text.count("!") |
| question_count = text.count("?") |
| scores["passionate"] = scores.get("passionate", 0) + exclamation_count * 0.5 |
| scores["cautious"] = scores.get("cautious", 0) + question_count * 0.3 |
|
|
| |
| total = sum(scores.values()) |
| if total == 0: |
| return {k: 0.2 for k in self.REGISTER_KEYWORDS} |
|
|
| return {k: v / total for k, v in scores.items()} |
|
|