rewrite / src /style /emotion_classifier.py
morpheuslord's picture
Add files using upload-large-folder tool
12fd5f2 verified
"""
Emotion/register classifier module.
Classifies text emotional register (neutral, passionate, cautious, etc.).
Used as one dimension of the style fingerprint.
"""
import re
from typing import Dict
class EmotionClassifier:
"""Classifies emotional register of text using keyword-based analysis."""
REGISTER_KEYWORDS = {
"neutral": {
"states", "indicates", "shows", "reports", "notes",
"describes", "observed", "found", "results", "data",
"information", "according", "based", "study", "analysis",
},
"passionate": {
"amazing", "incredible", "extraordinary", "remarkable",
"outstanding", "excellent", "wonderful", "brilliant",
"terrible", "devastating", "critical", "urgent",
"essential", "vital", "crucial", "imperative",
},
"cautious": {
"perhaps", "possibly", "might", "may", "could",
"seem", "appears", "suggests", "indicates", "tend",
"potentially", "arguably", "presumably", "conceivably",
"tentatively", "provisionally",
},
"analytical": {
"therefore", "consequently", "thus", "hence", "because",
"analysis", "examine", "investigate", "evaluate", "assess",
"compare", "contrast", "correlate", "determine", "evidence",
"hypothesis", "methodology", "framework",
},
"confident": {
"clearly", "obviously", "certainly", "definitely",
"undoubtedly", "indeed", "absolutely", "demonstrate",
"prove", "establish", "confirm", "guarantee",
"unquestionably", "invariably",
},
}
def __init__(self):
pass
def classify(self, text: str) -> Dict[str, float]:
"""Return emotion distribution over register categories.
Returns a dict with keys: neutral, passionate, cautious, analytical, confident.
Values are probabilities that sum to ~1.0.
"""
if not text or not text.strip():
return {k: 0.2 for k in self.REGISTER_KEYWORDS}
words = set(text.lower().split())
scores = {}
for register, keywords in self.REGISTER_KEYWORDS.items():
overlap = len(words & keywords)
scores[register] = overlap
# Add punctuation-based signals
exclamation_count = text.count("!")
question_count = text.count("?")
scores["passionate"] = scores.get("passionate", 0) + exclamation_count * 0.5
scores["cautious"] = scores.get("cautious", 0) + question_count * 0.3
# Normalise to probability distribution
total = sum(scores.values())
if total == 0:
return {k: 0.2 for k in self.REGISTER_KEYWORDS}
return {k: v / total for k, v in scores.items()}