""" Multi-Modal Confusion Detection Module for ContextFlow Combines audio, biometric, and behavioral signals for comprehensive confusion detection. Addresses: Multi-modal Confusion Detection requirement """ import numpy as np from typing import Dict, List, Optional, Tuple, Any from dataclasses import dataclass, field from collections import deque import threading import time @dataclass class AudioFeatures: """Audio features extracted from voice/speech""" speech_rate: float = 0.0 # Words per minute pause_frequency: float = 0.0 # Pauses per minute pause_duration: float = 0.0 # Average pause duration (ms) pitch_variation: float = 0.0 # Pitch standard deviation volume_level: float = 0.0 # Average volume (0-1) hesitations: int = 0 # Count of "uh", "um", etc. question_markers: int = 0 # Rising intonation count def to_vector(self) -> np.ndarray: """Convert to 7-dim feature vector""" return np.array([ self.speech_rate / 200, # Normalize to ~0-1 self.pause_frequency / 10, self.pause_duration / 5000, self.pitch_variation / 50, self.volume_level, self.hesitations / 20, self.question_markers / 10 ]) @dataclass class BiometricFeatures: """Biometric features for confusion detection""" heart_rate: float = 0.0 # BPM heart_rate_variability: float = 0.0 # HRV metric skin_conductance: float = 0.0 # GSR (microsiemens) skin_temperature: float = 0.0 # Celsius eye_blink_rate: float = 0.0 # Blinks per minute eye_open_duration: float = 0.0 # Average eye open (ms) def to_vector(self) -> np.ndarray: """Convert to 6-dim feature vector""" return np.array([ (self.heart_rate - 60) / 60, # Centered at resting HR self.heart_rate_variability / 50, self.skin_conductance / 20, (self.skin_temperature - 36) / 2, # Centered at 36C (self.eye_blink_rate - 15) / 15, # Centered at normal self.eye_open_duration / 500 ]) @dataclass class BehavioralFeatures: """Behavioral features (existing confusion signals)""" mouse_hesitation: float = 0.0 scroll_reversals: float = 0.0 time_on_page: float = 0.0 click_frequency: float = 0.0 back_button: float = 0.0 tab_switches: float = 0.0 copy_attempts: float = 0.0 search_usage: float = 0.0 def to_vector(self) -> np.ndarray: """Convert to 8-dim feature vector""" return np.array([ self.mouse_hesitation / 5, self.scroll_reversals / 10, self.time_on_page / 300, self.click_frequency / 20, self.back_button / 5, self.tab_switches / 10, self.copy_attempts / 5, self.search_usage / 5 ]) class MultiModalFusion: """ Fuses multiple signal modalities into unified confusion assessment. Supported modalities: - Audio: Speech patterns, hesitations - Biometric: Heart rate, GSR, eye tracking - Behavioral: Mouse, keyboard, scrolling patterns """ def __init__( self, audio_weight: float = 0.2, biometric_weight: float = 0.3, behavioral_weight: float = 0.5 ): self.audio_weight = audio_weight self.biometric_weight = biometric_weight self.behavioral_weight = behavioral_weight # Modality-specific thresholds self.audio_threshold = 0.6 self.biometric_threshold = 0.7 self.behavioral_threshold = 0.5 # History buffers self.audio_history = deque(maxlen=30) # Last 30 seconds self.biometric_history = deque(maxlen=60) # Last 60 seconds self.behavioral_history = deque(maxlen=100) # Last 100 events # Thread safety self.lock = threading.Lock() def update_audio(self, features: AudioFeatures): """Update audio feature buffer""" with self.lock: self.audio_history.append(features) def update_biometric(self, features: BiometricFeatures): """Update biometric feature buffer""" with self.lock: self.biometric_history.append(features) def update_behavioral(self, features: BehavioralFeatures): """Update behavioral feature buffer""" with self.lock: self.behavioral_history.append(features) def get_audio_confusion(self) -> Tuple[float, str]: """Get confusion score from audio signals""" with self.lock: if not self.audio_history: return 0.0, "no_audio" recent = list(self.audio_history)[-10:] # Last 10 samples # Compute weighted features speech_rate = np.mean([f.speech_rate for f in recent]) hesitations = np.mean([f.hesitations for f in recent]) pause_freq = np.mean([f.pause_frequency for f in recent]) # Confusion indicators: # - Slower speech rate # - More hesitations # - More pauses confusion = 0.0 if speech_rate < 120: # Slow speech confusion += 0.3 if hesitations > 5: # Many hesitations confusion += 0.4 if pause_freq > 3: # Frequent pauses confusion += 0.3 confusion = min(confusion, 1.0) return confusion, self._get_audio_reason(hesitations, speech_rate, pause_freq) def _get_audio_reason(self, hesitations: float, speech_rate: float, pause_freq: float) -> str: """Generate human-readable audio confusion reason""" reasons = [] if hesitations > 5: reasons.append("frequent_hesitations") if speech_rate < 120: reasons.append("slow_speech") if pause_freq > 3: reasons.append("frequent_pauses") return ",".join(reasons) if reasons else "normal" def get_biometric_confusion(self) -> Tuple[float, str]: """Get confusion score from biometric signals""" with self.lock: if not self.biometric_history: return 0.0, "no_biometric" recent = list(self.biometric_history)[-20:] # Last 20 samples hr = np.mean([f.heart_rate for f in recent]) hrv = np.mean([f.heart_rate_variability for f in recent]) gsr = np.mean([f.skin_conductance for f in recent]) # Confusion indicators: # - Elevated heart rate # - Lower HRV (stress indicator) # - Higher GSR (arousal) confusion = 0.0 if hr > 85: # Elevated HR confusion += 0.3 if hrv < 30: # Low HRV confusion += 0.3 if gsr > 10: # Elevated GSR confusion += 0.4 confusion = min(confusion, 1.0) return confusion, self._get_biometric_reason(hr, hrv, gsr) def _get_biometric_reason(self, hr: float, hrv: float, gsr: float) -> str: """Generate human-readable biometric confusion reason""" reasons = [] if hr > 85: reasons.append("elevated_heart_rate") if hrv < 30: reasons.append("low_hrv") if gsr > 10: reasons.append("high_arousal") return ",".join(reasons) if reasons else "normal" def get_behavioral_confusion(self) -> Tuple[float, str]: """Get confusion score from behavioral signals""" with self.lock: if not self.behavioral_history: return 0.0, "no_behavioral" recent = list(self.behavioral_history)[-20:] # Last 20 events mouse_h = np.mean([f.mouse_hesitation for f in recent]) scrolls = np.mean([f.scroll_reversals for f in recent]) back_btn = np.mean([f.back_button for f in recent]) confusion = 0.0 if mouse_h > 3: confusion += 0.3 if scrolls > 5: confusion += 0.3 if back_btn > 3: confusion += 0.2 confusion = min(confusion, 1.0) return confusion, self._get_behavioral_reason(mouse_h, scrolls, back_btn) def _get_behavioral_reason(self, mouse_h: float, scrolls: float, back_btn: float) -> str: """Generate human-readable behavioral confusion reason""" reasons = [] if mouse_h > 3: reasons.append("mouse_hesitation") if scrolls > 5: reasons.append("scroll_reversals") if back_btn > 3: reasons.append("back_button_usage") return ",".join(reasons) if reasons else "normal" def get_fused_confusion(self) -> Dict[str, Any]: """ Get fused multi-modal confusion assessment. Returns: Dict with confusion scores, reasons, and confidence """ audio_score, audio_reason = self.get_audio_confusion() biometric_score, biometric_reason = self.get_biometric_confusion() behavioral_score, behavioral_reason = self.get_behavioral_confusion() # Weighted fusion fused_score = ( audio_score * self.audio_weight + biometric_score * self.biometric_weight + behavioral_score * self.behavioral_weight ) # Confidence based on signal availability n_signals = sum([ len(self.audio_history) > 0, len(self.biometric_history) > 0, len(self.behavioral_history) > 0 ]) confidence = min(n_signals / 3.0, 1.0) # Primary indicator (highest weighted contribution) contributions = { 'audio': audio_score * self.audio_weight, 'biometric': biometric_score * self.biometric_weight, 'behavioral': behavioral_score * self.behavioral_weight } primary_indicator = max(contributions, key=contributions.get) return { 'confusion_score': fused_score, 'confidence': confidence, 'primary_indicator': primary_indicator, 'audio_score': audio_score, 'biometric_score': biometric_score, 'behavioral_score': behavioral_score, 'audio_reason': audio_reason, 'biometric_reason': biometric_reason, 'behavioral_reason': behavioral_reason, 'suggested_action': self._get_suggested_action(fused_score, primary_indicator), 'available_modalities': { 'audio': len(self.audio_history) > 0, 'biometric': len(self.biometric_history) > 0, 'behavioral': len(self.behavioral_history) > 0 } } def _get_suggested_action(self, score: float, primary: str) -> str: """Get suggested intervention based on confusion level""" if score < 0.3: return "continue_learning" elif score < 0.5: return "offer_hint" elif score < 0.7: return "trigger_ai_explanation" else: return "pause_and_assess" def reset(self): """Reset all buffers""" with self.lock: self.audio_history.clear() self.biometric_history.clear() self.behavioral_history.clear() class AudioAnalyzer: """ Real-time audio analysis for confusion detection. Requires: microphone input (simulated for now) """ def __init__(self): self.sample_buffer = deque(maxlen=1000) self.is_recording = False self.sample_rate = 16000 def start_recording(self): """Start audio capture""" self.is_recording = True self.sample_buffer.clear() def stop_recording(self): """Stop audio capture""" self.is_recording = False def add_audio_sample(self, amplitude: float): """Add audio amplitude sample""" if self.is_recording: self.sample_buffer.append({ 'amplitude': amplitude, 'timestamp': time.time() }) def analyze(self) -> AudioFeatures: """Analyze audio buffer and extract features""" if len(self.sample_buffer) < 100: return AudioFeatures() amplitudes = [s['amplitude'] for s in self.sample_buffer] # Simple feature extraction features = AudioFeatures() # Detect pauses (low amplitude segments) threshold = np.mean(amplitudes) * 0.3 is_pause = amplitudes < threshold pause_durations = [] current_pause = 0 for p in is_pause: if p: current_pause += 1 else: if current_pause > 0: pause_durations.append(current_pause) current_pause = 0 features.pause_frequency = len(pause_durations) / (len(amplitudes) / self.sample_rate) * 60 features.pause_duration = np.mean(pause_durations) * 1000 / self.sample_rate if pause_durations else 0 # Volume level features.volume_level = np.mean(amplitudes) return features class BiometricProcessor: """ Processes biometric data for confusion detection. Supports: heart rate monitors, GSR sensors, eye trackers """ def __init__(self): self.data_buffer = deque(maxlen=60) def add_reading( self, heart_rate: Optional[float] = None, hrv: Optional[float] = None, gsr: Optional[float] = None, skin_temp: Optional[float] = None, blink_rate: Optional[float] = None, eye_open: Optional[float] = None ): """Add biometric reading""" self.data_buffer.append({ 'heart_rate': heart_rate, 'hrv': hrv, 'gsr': gsr, 'skin_temp': skin_temp, 'blink_rate': blink_rate, 'eye_open': eye_open, 'timestamp': time.time() }) def analyze(self) -> BiometricFeatures: """Analyze biometric buffer and extract features""" if len(self.data_buffer) < 5: return BiometricFeatures() features = BiometricFeatures() hr_values = [d['heart_rate'] for d in self.data_buffer if d['heart_rate']] hrv_values = [d['hrv'] for d in self.data_buffer if d['hrv']] gsr_values = [d['gsr'] for d in self.data_buffer if d['gsr']] if hr_values: features.heart_rate = np.mean(hr_values) if hrv_values: features.heart_rate_variability = np.mean(hrv_values) if gsr_values: features.skin_conductance = np.mean(gsr_values) return features # API integration class MultiModalAPI: """REST API for multi-modal confusion detection""" def __init__(self, fusion: MultiModalFusion): self.fusion = fusion self.audio_analyzer = AudioAnalyzer() self.biometric_processor = BiometricProcessor() def process_audio(self, amplitude: float): """Process audio sample""" self.audio_analyzer.add_audio_sample(amplitude) features = self.audio_analyzer.analyze() self.fusion.update_audio(features) return features def process_biometric( self, heart_rate: Optional[float] = None, hrv: Optional[float] = None, gsr: Optional[float] = None ): """Process biometric data""" self.biometric_processor.add_reading( heart_rate=heart_rate, hrv=hrv, gsr=gsr ) features = self.biometric_processor.analyze() self.fusion.update_biometric(features) return features def process_behavioral( self, mouse_hesitation: float = 0, scroll_reversals: float = 0, time_on_page: float = 0 ): """Process behavioral data""" features = BehavioralFeatures( mouse_hesitation=mouse_hesitation, scroll_reversals=scroll_reversals, time_on_page=time_on_page ) self.fusion.update_behavioral(features) return features def get_confusion_assessment(self) -> Dict: """Get multi-modal confusion assessment""" return self.fusion.get_fused_confusion() # Demo if __name__ == "__main__": fusion = MultiModalFusion() api = MultiModalAPI(fusion) print("Multi-Modal Confusion Detection Demo") print("=" * 40) # Simulate data collection for i in range(20): # Audio: increasing hesitation api.process_audio(amplitude=0.3 if i < 10 else 0.1) # Biometric: elevated stress api.process_biometric( heart_rate=75 + i * 0.5, hrv=40 - i * 0.3, gsr=8 + i * 0.2 ) # Behavioral: more reversals api.process_behavioral( mouse_hesitation=2 + i * 0.2, scroll_reversals=3 + i * 0.3, time_on_page=60 + i * 3 ) # Get assessment result = api.get_confusion_assessment() print(f"Confusion Score: {result['confusion_score']:.2f}") print(f"Confidence: {result['confidence']:.2f}") print(f"Primary Indicator: {result['primary_indicator']}") print(f"Biometric Score: {result['biometric_score']:.2f}") print(f"Behavioral Score: {result['behavioral_score']:.2f}") print(f"Suggested Action: {result['suggested_action']}")