Spaces:

divAIne
/

busy-module-xgboost

Sleeping

App Files Files Community

busy-module-xgboost / scoring_engine.py

EurekaPotato

Gate short-

91c5b8e verified 12 days ago

raw

history blame contribute delete

7.89 kB

	"""
	Scoring Engine
	Advanced heuristic and logic layer for combining ML predictions with expert rules.
	Implements a "Signal Detection" approach using Evidence Accumulation.
	"""

	import numpy as np
	from typing import Dict, Tuple

	class ScoringEngine:
	"""
	Bio-inspired Signal Detection System.
	Accumulates evidence for "Busy" vs "Not Busy".
	"""

	def __init__(self):
	# Evidence Weights (Log-Odds contributions)
	# Positive = Evidence for BUSY
	# Negative = Evidence for NOT BUSY
	self.WEIGHTS = {
	'explicit_busy': 6.0, # Strongest signal
	'explicit_free': -4.0, # Strong negative signal (if we had it)
	'traffic_noise': 3.0, # Strong context
	'office_noise': 1.0, # Weak context
	'rushed_speech': 1.5, # Medium context
	'short_answers': 1.2, # Medium context
	'deflection': 2.0, # Medium-Strong context
	'latency': 0.5, # Weak context
	'ml_model_factor': 0.5, # Multiplier for ML log-odds (reduced)
	'emotion_stress': 2.5, # Paper 1: Strong indicator of busy state
	'emotion_energy': 0.8, # Medium indicator

	}

	def _sigmoid(self, x: float) -> float:
	"""Convert log-odds to probability (0-1)"""
	return 1.0 / (1.0 + np.exp(-x))

	def _logit(self, p: float) -> float:
	"""Convert probability to log-odds (-inf to +inf)"""
	p = np.clip(p, 0.01, 0.99) # Avoid inf
	return np.log(p / (1.0 - p))

	def calculate_score(
	self,
	audio_features: Dict[str, float],
	text_features: Dict[str, float],
	ml_probability: float
	) -> Tuple[float, Dict]:
	"""
	Calculate Busy Score using Evidence Accumulation.
	"""

	evidence = 0.0
	positive_evidence = 0.0
	negative_evidence = 0.0
	details = []

	def add_evidence(points: float, label: str) -> None:
	nonlocal evidence, positive_evidence, negative_evidence
	evidence += points
	if points >= 0:
	positive_evidence += points
	else:
	negative_evidence += points
	details.append(label)

	# Check if user explicitly invited conversation (intent overrides context)
	explicit_free = text_features.get('t0_explicit_free', 0.0)
	intent_overrides_context = explicit_free > 0.5

	# --- 1. Text Evidence (Intent) ---

	# Explicit Busy
	explicit = text_features.get('t1_explicit_busy', 0.0)
	if explicit > 0.5:
	points = self.WEIGHTS['explicit_busy'] * explicit
	add_evidence(points, f"Explicit Intent (+{points:.1f})")

	# Explicit Free (negative evidence)
	if explicit_free > 0.5:
	points = self.WEIGHTS['explicit_free'] * explicit_free
	add_evidence(points, f"Explicit Free ({points:.1f})")

	# Short Answers (Brevity) - only counts when there's other busy evidence
	short_ratio = text_features.get('t3_short_ratio', 0.0)
	if short_ratio > 0.3:
	deflection = text_features.get('t6_deflection', 0.0)
	time_pressure = text_features.get('t5_time_pressure', 0.0)
	busy_context = (explicit > 0.5) or (deflection > 0.1) or (time_pressure > 0.1)

	if intent_overrides_context:
	points = self.WEIGHTS['short_answers'] * short_ratio * 0.4
	add_evidence(points, f"Brief Responses (+{points:.1f}, reduced - user invited talk)")
	elif busy_context:
	points = self.WEIGHTS['short_answers'] * short_ratio
	add_evidence(points, f"Brief Responses (+{points:.1f})")
	else:
	details.append("Brief Responses (ignored - no busy evidence)")

	# Deflection / Time Pressure
	deflection = text_features.get('t6_deflection', 0.0)
	if deflection > 0.1:
	points = self.WEIGHTS['deflection'] * deflection
	add_evidence(points, f"Deflection (+{points:.1f})")

	# --- 2. Audio Evidence (Context) ---

	# Traffic Noise (reduced when user explicitly invites talk)
	traffic = audio_features.get('v2_noise_traffic', 0.0)
	if traffic > 0.5:
	points = self.WEIGHTS['traffic_noise'] * traffic
	if intent_overrides_context:
	points *= 0.3 # Strong availability signal overrides traffic context
	add_evidence(points, f"Traffic Context (+{points:.1f}, reduced - user invited talk)")
	else:
	add_evidence(points, f"Traffic Context (+{points:.1f})")

	# Speech Rate
	rate = audio_features.get('v3_speech_rate', 0.0)
	if rate > 3.5: # Fast speech
	points = self.WEIGHTS['rushed_speech']
	add_evidence(points, f"Rushed Speech (+{points:.1f})")
	elif rate < 1.0: # Very slow speech (might be distracted)
	pass # Neutral for now

	# Energy/Pitch (Stress)
	pitch_std = audio_features.get('v5_pitch_std', 0.0)
	if pitch_std > 80.0: # High variation
	add_evidence(0.5, "Voice Stress (+0.5)")

	# --- 2b. Emotion Evidence (if present) ---
	emotion_stress = audio_features.get('v11_emotion_stress', 0.0)
	if emotion_stress > 0.6:
	points = self.WEIGHTS['emotion_stress'] * emotion_stress
	add_evidence(points, f"Emotional Stress (+{points:.1f})")

	emotion_energy = audio_features.get('v12_emotion_energy', 0.0)
	if emotion_energy > 0.7:
	points = self.WEIGHTS['emotion_energy'] * emotion_energy
	add_evidence(points, f"High Energy (+{points:.1f})")

	# --- 3. Machine Learning Evidence (Baseline) ---

	# Convert Model Probability to Log-Odds Evidence
	ml_evidence = self._logit(ml_probability)
	weighted_ml_evidence = ml_evidence * self.WEIGHTS['ml_model_factor']

	add_evidence(weighted_ml_evidence, f"ML Baseline ({weighted_ml_evidence:+.1f})")

	# --- 4. Final Calculation ---... more value ot the voice features, especially the emotional ones

	# Sigmoid converts total evidence back to 0-1 probability
	final_score = self._sigmoid(evidence)

	breakdown = {
	'total_evidence': evidence,
	'positive_evidence': positive_evidence,
	'negative_evidence': negative_evidence,
	'details': details,
	'ml_contribution': weighted_ml_evidence
	}

	return final_score, breakdown


	def get_confidence(self, score: float, breakdown: Dict) -> float:
	"""
	Calculate confidence based on EVIDENCE MAGNITUDE.
	Strong evidence (positive or negative) = High Confidence.
	Zero evidence = Low Confidence.
	"""
	positive_evidence = breakdown.get('positive_evidence', 0.0)
	negative_evidence = abs(breakdown.get('negative_evidence', 0.0))
	total_strength = positive_evidence + negative_evidence
	conflict = min(positive_evidence, negative_evidence)
	conflict_ratio = conflict / (total_strength + 1e-6)

	# We model confidence as a sigmoid of absolute evidence
	# \|evidence\| = 0 -> Confidence = 0.5 (Unsure) ?? No, 0.0 (Total Guess)
	# But standard sigmoid(0) is 0.5.
	# We want 0->0, values->1.

	# Use a scaling factor. Evidence of > 3.0 is very strong.
	# tanh is good: tanh(0)=0, tanh(3)≈0.995

	base_confidence = np.tanh(total_strength / 2.0)
	confidence = base_confidence * (1.0 - conflict_ratio)

	return float(confidence)