tillu-daemon / app /transformers /classifiers.py
tillu-AI's picture
upload app/transformers/classifiers.py
7e59074 verified
"""
Classification models for intent, emotion, and stress
Uses Hugging Face Inference API
"""
import httpx
from typing import Dict, Any, Optional
from app.config import settings
from app.utils.logging import get_logger
logger = get_logger("classifiers")
class IntentClassifier:
"""
Intent Classification
Model: distilbert-base-uncased (fine-tuned)
Output: Intent class from 14 categories
"""
INTENT_CLASSES = [
"small_talk",
"general_query",
"follow_up",
"research_request",
"deep_analysis",
"action_required",
"real_world_query",
"multi_step_task",
"pattern_query",
"data_analysis",
"structured_request",
"distress",
"sadness",
"high_stress",
]
def __init__(self):
self.model = settings.hf_classifier_model
self.api_url = settings.hf_inference_api_url
self.token = settings.hf_token
self.logger = get_logger("intent_classifier")
async def classify(self, text: str) -> Dict[str, Any]:
"""
Classify user intent
Args:
text: User input text
Returns:
Dict with intent_class, confidence, all_scores
"""
if not text or not text.strip():
return {
"intent_class": "general_query",
"confidence": 1.0,
"all_scores": {}
}
try:
# Use HF Inference API
result = await self._classify_hf_api(text)
if result:
return result
except Exception as e:
self.logger.error(f"Intent classification error: {e}")
# Fallback: Simple keyword-based classification
return self._classify_heuristic(text)
async def _classify_hf_api(self, text: str) -> Optional[Dict[str, Any]]:
"""Classify via HF Inference API"""
if not self.token:
return None
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.api_url}/models/{self.model}",
headers={"Authorization": f"Bearer {self.token}"},
json={"inputs": text},
timeout=10.0
)
if response.status_code == 200:
data = response.json()
# Parse HF zero-shot or classification output
if isinstance(data, list) and len(data) > 0:
predictions = data[0]
# Get top prediction
if isinstance(predictions, list):
top = max(predictions, key=lambda x: x.get('score', 0))
return {
"intent_class": top.get('label', 'general_query').lower().replace(' ', '_'),
"confidence": top.get('score', 0.5),
"all_scores": {p.get('label', '').lower().replace(' ', '_'): p.get('score', 0) for p in predictions}
}
return None
def _classify_heuristic(self, text: str) -> Dict[str, Any]:
"""Fallback heuristic classification"""
text_lower = text.lower()
# Research indicators
if any(word in text_lower for word in ['research', 'analyze', 'study', 'investigate', 'deep dive']):
return {"intent_class": "research_request", "confidence": 0.7, "all_scores": {}}
# Action indicators
if any(word in text_lower for word in ['book', 'schedule', 'set up', 'create', 'buy', 'order']):
return {"intent_class": "action_required", "confidence": 0.7, "all_scores": {}}
# Distress indicators
if any(word in text_lower for word in ['stressed', 'worried', 'anxious', 'overwhelmed', 'help']):
return {"intent_class": "distress", "confidence": 0.6, "all_scores": {}}
# Question indicators
if '?' in text or any(word in text_lower for word in ['what', 'how', 'why', 'when', 'where']):
return {"intent_class": "general_query", "confidence": 0.8, "all_scores": {}}
# Default
return {"intent_class": "small_talk", "confidence": 0.6, "all_scores": {}}
class EmotionDetector:
"""
Emotion Detection
Model: j-hartmann/emotion-english-distilroberta-base
Output: {joy, sadness, anger, fear, surprise, disgust, neutral} scores
"""
EMOTIONS = ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"]
def __init__(self):
self.model = settings.hf_emotion_model
self.api_url = settings.hf_inference_api_url
self.token = settings.hf_token
self.logger = get_logger("emotion_detector")
async def detect(self, text: str) -> Dict[str, Any]:
"""
Detect emotions in text
Args:
text: Input text
Returns:
Dict with emotion scores and dominant emotion
"""
if not text or not text.strip():
return {
"dominant_emotion": "neutral",
"scores": {e: 0.0 for e in self.EMOTIONS},
"emotion_intensity": 0.0
}
try:
result = await self._detect_hf_api(text)
if result:
return result
except Exception as e:
self.logger.error(f"Emotion detection error: {e}")
# Fallback: neutral
return {
"dominant_emotion": "neutral",
"scores": {e: 0.0 for e in self.EMOTIONS},
"emotion_intensity": 0.0
}
async def _detect_hf_api(self, text: str) -> Optional[Dict[str, Any]]:
"""Detect emotions via HF Inference API"""
if not self.token:
return None
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.api_url}/models/{self.model}",
headers={"Authorization": f"Bearer {self.token}"},
json={"inputs": text},
timeout=10.0
)
if response.status_code == 200:
data = response.json()
if isinstance(data, list) and len(data) > 0:
predictions = data[0]
# Build scores dict
scores = {}
for pred in predictions:
label = pred.get('label', '').lower()
score = pred.get('score', 0.0)
scores[label] = score
# Fill missing emotions with 0
for emotion in self.EMOTIONS:
if emotion not in scores:
scores[emotion] = 0.0
# Determine dominant
dominant = max(scores, key=scores.get)
intensity = scores[dominant]
return {
"dominant_emotion": dominant,
"scores": scores,
"emotion_intensity": intensity
}
return None
class StressDetector:
"""
Stress/Toxicity Detection
Model: martin-ha/toxic-comment-model
Output: Stress/distress probability score
"""
def __init__(self):
self.model = "martin-ha/toxic-comment-model"
self.api_url = settings.hf_inference_api_url
self.token = settings.hf_token
self.logger = get_logger("stress_detector")
async def detect(self, text: str) -> Dict[str, Any]:
"""
Detect stress level
Args:
text: Input text
Returns:
Dict with stress_level, score, is_stressed
"""
if not text or not text.strip():
return {
"stress_level": "low",
"score": 0.0,
"is_stressed": False
}
try:
result = await self._detect_hf_api(text)
if result:
return result
except Exception as e:
self.logger.error(f"Stress detection error: {e}")
# Fallback: Heuristic
return self._detect_heuristic(text)
async def _detect_hf_api(self, text: str) -> Optional[Dict[str, Any]]:
"""Detect stress via HF Inference API"""
if not self.token:
return None
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.api_url}/models/{self.model}",
headers={"Authorization": f"Bearer {self.token}"},
json={"inputs": text},
timeout=10.0
)
if response.status_code == 200:
data = response.json()
if isinstance(data, list) and len(data) > 0:
predictions = data[0]
# Calculate toxic score
toxic_score = 0.0
for pred in predictions:
if pred.get('label') == 'toxic' or pred.get('label') == 'LABEL_1':
toxic_score = pred.get('score', 0.0)
# Map to stress levels
if toxic_score > 0.7:
level = "high"
elif toxic_score > 0.3:
level = "medium"
else:
level = "low"
return {
"stress_level": level,
"score": toxic_score,
"is_stressed": toxic_score > 0.5
}
return None
def _detect_heuristic(self, text: str) -> Dict[str, Any]:
"""Heuristic stress detection"""
text_lower = text.lower()
stress_words = [
'stressed', 'overwhelmed', 'anxious', 'worried', 'panic',
'urgent', 'emergency', 'help', 'desperate', 'exhausted'
]
count = sum(1 for word in stress_words if word in text_lower)
intensity = min(count / 3, 1.0) # Cap at 1.0
if intensity > 0.6:
level = "high"
elif intensity > 0.3:
level = "medium"
else:
level = "low"
return {
"stress_level": level,
"score": intensity,
"is_stressed": intensity > 0.5
}
# Global instances
intent_classifier = IntentClassifier()
emotion_detector = EmotionDetector()
stress_detector = StressDetector()