Spaces:

tillu-AI
/

tillu-daemon

Sleeping

App Files Files Community

tillu-daemon / app /transformers /classifiers.py

tillu-AI

upload app/transformers/classifiers.py

7e59074 verified 24 days ago

raw

history blame contribute delete

11 kB

	"""
	Classification models for intent, emotion, and stress
	Uses Hugging Face Inference API
	"""
	import httpx
	from typing import Dict, Any, Optional
	from app.config import settings
	from app.utils.logging import get_logger

	logger = get_logger("classifiers")


	class IntentClassifier:
	"""
	Intent Classification
	Model: distilbert-base-uncased (fine-tuned)
	Output: Intent class from 14 categories
	"""

	INTENT_CLASSES = [
	"small_talk",
	"general_query",
	"follow_up",
	"research_request",
	"deep_analysis",
	"action_required",
	"real_world_query",
	"multi_step_task",
	"pattern_query",
	"data_analysis",
	"structured_request",
	"distress",
	"sadness",
	"high_stress",
	]

	def __init__(self):
	self.model = settings.hf_classifier_model
	self.api_url = settings.hf_inference_api_url
	self.token = settings.hf_token
	self.logger = get_logger("intent_classifier")

	async def classify(self, text: str) -> Dict[str, Any]:
	"""
	Classify user intent

	Args:
	text: User input text

	Returns:
	Dict with intent_class, confidence, all_scores
	"""
	if not text or not text.strip():
	return {
	"intent_class": "general_query",
	"confidence": 1.0,
	"all_scores": {}
	}

	try:
	# Use HF Inference API
	result = await self._classify_hf_api(text)
	if result:
	return result
	except Exception as e:
	self.logger.error(f"Intent classification error: {e}")

	# Fallback: Simple keyword-based classification
	return self._classify_heuristic(text)

	async def _classify_hf_api(self, text: str) -> Optional[Dict[str, Any]]:
	"""Classify via HF Inference API"""
	if not self.token:
	return None

	async with httpx.AsyncClient() as client:
	response = await client.post(
	f"{self.api_url}/models/{self.model}",
	headers={"Authorization": f"Bearer {self.token}"},
	json={"inputs": text},
	timeout=10.0
	)

	if response.status_code == 200:
	data = response.json()

	# Parse HF zero-shot or classification output
	if isinstance(data, list) and len(data) > 0:
	predictions = data[0]

	# Get top prediction
	if isinstance(predictions, list):
	top = max(predictions, key=lambda x: x.get('score', 0))
	return {
	"intent_class": top.get('label', 'general_query').lower().replace(' ', '_'),
	"confidence": top.get('score', 0.5),
	"all_scores": {p.get('label', '').lower().replace(' ', '_'): p.get('score', 0) for p in predictions}
	}

	return None

	def _classify_heuristic(self, text: str) -> Dict[str, Any]:
	"""Fallback heuristic classification"""
	text_lower = text.lower()

	# Research indicators
	if any(word in text_lower for word in ['research', 'analyze', 'study', 'investigate', 'deep dive']):
	return {"intent_class": "research_request", "confidence": 0.7, "all_scores": {}}

	# Action indicators
	if any(word in text_lower for word in ['book', 'schedule', 'set up', 'create', 'buy', 'order']):
	return {"intent_class": "action_required", "confidence": 0.7, "all_scores": {}}

	# Distress indicators
	if any(word in text_lower for word in ['stressed', 'worried', 'anxious', 'overwhelmed', 'help']):
	return {"intent_class": "distress", "confidence": 0.6, "all_scores": {}}

	# Question indicators
	if '?' in text or any(word in text_lower for word in ['what', 'how', 'why', 'when', 'where']):
	return {"intent_class": "general_query", "confidence": 0.8, "all_scores": {}}

	# Default
	return {"intent_class": "small_talk", "confidence": 0.6, "all_scores": {}}


	class EmotionDetector:
	"""
	Emotion Detection
	Model: j-hartmann/emotion-english-distilroberta-base
	Output: {joy, sadness, anger, fear, surprise, disgust, neutral} scores
	"""

	EMOTIONS = ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"]

	def __init__(self):
	self.model = settings.hf_emotion_model
	self.api_url = settings.hf_inference_api_url
	self.token = settings.hf_token
	self.logger = get_logger("emotion_detector")

	async def detect(self, text: str) -> Dict[str, Any]:
	"""
	Detect emotions in text

	Args:
	text: Input text

	Returns:
	Dict with emotion scores and dominant emotion
	"""
	if not text or not text.strip():
	return {
	"dominant_emotion": "neutral",
	"scores": {e: 0.0 for e in self.EMOTIONS},
	"emotion_intensity": 0.0
	}

	try:
	result = await self._detect_hf_api(text)
	if result:
	return result
	except Exception as e:
	self.logger.error(f"Emotion detection error: {e}")

	# Fallback: neutral
	return {
	"dominant_emotion": "neutral",
	"scores": {e: 0.0 for e in self.EMOTIONS},
	"emotion_intensity": 0.0
	}

	async def _detect_hf_api(self, text: str) -> Optional[Dict[str, Any]]:
	"""Detect emotions via HF Inference API"""
	if not self.token:
	return None

	async with httpx.AsyncClient() as client:
	response = await client.post(
	f"{self.api_url}/models/{self.model}",
	headers={"Authorization": f"Bearer {self.token}"},
	json={"inputs": text},
	timeout=10.0
	)

	if response.status_code == 200:
	data = response.json()

	if isinstance(data, list) and len(data) > 0:
	predictions = data[0]

	# Build scores dict
	scores = {}
	for pred in predictions:
	label = pred.get('label', '').lower()
	score = pred.get('score', 0.0)
	scores[label] = score

	# Fill missing emotions with 0
	for emotion in self.EMOTIONS:
	if emotion not in scores:
	scores[emotion] = 0.0

	# Determine dominant
	dominant = max(scores, key=scores.get)
	intensity = scores[dominant]

	return {
	"dominant_emotion": dominant,
	"scores": scores,
	"emotion_intensity": intensity
	}

	return None


	class StressDetector:
	"""
	Stress/Toxicity Detection
	Model: martin-ha/toxic-comment-model
	Output: Stress/distress probability score
	"""

	def __init__(self):
	self.model = "martin-ha/toxic-comment-model"
	self.api_url = settings.hf_inference_api_url
	self.token = settings.hf_token
	self.logger = get_logger("stress_detector")

	async def detect(self, text: str) -> Dict[str, Any]:
	"""
	Detect stress level

	Args:
	text: Input text

	Returns:
	Dict with stress_level, score, is_stressed
	"""
	if not text or not text.strip():
	return {
	"stress_level": "low",
	"score": 0.0,
	"is_stressed": False
	}

	try:
	result = await self._detect_hf_api(text)
	if result:
	return result
	except Exception as e:
	self.logger.error(f"Stress detection error: {e}")

	# Fallback: Heuristic
	return self._detect_heuristic(text)

	async def _detect_hf_api(self, text: str) -> Optional[Dict[str, Any]]:
	"""Detect stress via HF Inference API"""
	if not self.token:
	return None

	async with httpx.AsyncClient() as client:
	response = await client.post(
	f"{self.api_url}/models/{self.model}",
	headers={"Authorization": f"Bearer {self.token}"},
	json={"inputs": text},
	timeout=10.0
	)

	if response.status_code == 200:
	data = response.json()

	if isinstance(data, list) and len(data) > 0:
	predictions = data[0]

	# Calculate toxic score
	toxic_score = 0.0
	for pred in predictions:
	if pred.get('label') == 'toxic' or pred.get('label') == 'LABEL_1':
	toxic_score = pred.get('score', 0.0)

	# Map to stress levels
	if toxic_score > 0.7:
	level = "high"
	elif toxic_score > 0.3:
	level = "medium"
	else:
	level = "low"

	return {
	"stress_level": level,
	"score": toxic_score,
	"is_stressed": toxic_score > 0.5
	}

	return None

	def _detect_heuristic(self, text: str) -> Dict[str, Any]:
	"""Heuristic stress detection"""
	text_lower = text.lower()

	stress_words = [
	'stressed', 'overwhelmed', 'anxious', 'worried', 'panic',
	'urgent', 'emergency', 'help', 'desperate', 'exhausted'
	]

	count = sum(1 for word in stress_words if word in text_lower)
	intensity = min(count / 3, 1.0) # Cap at 1.0

	if intensity > 0.6:
	level = "high"
	elif intensity > 0.3:
	level = "medium"
	else:
	level = "low"

	return {
	"stress_level": level,
	"score": intensity,
	"is_stressed": intensity > 0.5
	}


	# Global instances
	intent_classifier = IntentClassifier()
	emotion_detector = EmotionDetector()
	stress_detector = StressDetector()