File size: 18,084 Bytes

82e4a98

"""
Multi-Modal Confusion Detection Module for ContextFlow

Combines audio, biometric, and behavioral signals for comprehensive confusion detection.
Addresses: Multi-modal Confusion Detection requirement
"""

import numpy as np
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass, field
from collections import deque
import threading
import time


@dataclass
class AudioFeatures:
    """Audio features extracted from voice/speech"""
    speech_rate: float = 0.0          # Words per minute
    pause_frequency: float = 0.0       # Pauses per minute
    pause_duration: float = 0.0        # Average pause duration (ms)
    pitch_variation: float = 0.0       # Pitch standard deviation
    volume_level: float = 0.0         # Average volume (0-1)
    hesitations: int = 0               # Count of "uh", "um", etc.
    question_markers: int = 0         # Rising intonation count
    
    def to_vector(self) -> np.ndarray:
        """Convert to 7-dim feature vector"""
        return np.array([
            self.speech_rate / 200,  # Normalize to ~0-1
            self.pause_frequency / 10,
            self.pause_duration / 5000,
            self.pitch_variation / 50,
            self.volume_level,
            self.hesitations / 20,
            self.question_markers / 10
        ])


@dataclass
class BiometricFeatures:
    """Biometric features for confusion detection"""
    heart_rate: float = 0.0           # BPM
    heart_rate_variability: float = 0.0 # HRV metric
    skin_conductance: float = 0.0      # GSR (microsiemens)
    skin_temperature: float = 0.0     # Celsius
    eye_blink_rate: float = 0.0       # Blinks per minute
    eye_open_duration: float = 0.0    # Average eye open (ms)
    
    def to_vector(self) -> np.ndarray:
        """Convert to 6-dim feature vector"""
        return np.array([
            (self.heart_rate - 60) / 60,  # Centered at resting HR
            self.heart_rate_variability / 50,
            self.skin_conductance / 20,
            (self.skin_temperature - 36) / 2,  # Centered at 36C
            (self.eye_blink_rate - 15) / 15,  # Centered at normal
            self.eye_open_duration / 500
        ])


@dataclass
class BehavioralFeatures:
    """Behavioral features (existing confusion signals)"""
    mouse_hesitation: float = 0.0
    scroll_reversals: float = 0.0
    time_on_page: float = 0.0
    click_frequency: float = 0.0
    back_button: float = 0.0
    tab_switches: float = 0.0
    copy_attempts: float = 0.0
    search_usage: float = 0.0
    
    def to_vector(self) -> np.ndarray:
        """Convert to 8-dim feature vector"""
        return np.array([
            self.mouse_hesitation / 5,
            self.scroll_reversals / 10,
            self.time_on_page / 300,
            self.click_frequency / 20,
            self.back_button / 5,
            self.tab_switches / 10,
            self.copy_attempts / 5,
            self.search_usage / 5
        ])


class MultiModalFusion:
    """
    Fuses multiple signal modalities into unified confusion assessment.
    
    Supported modalities:
    - Audio: Speech patterns, hesitations
    - Biometric: Heart rate, GSR, eye tracking
    - Behavioral: Mouse, keyboard, scrolling patterns
    """
    
    def __init__(
        self,
        audio_weight: float = 0.2,
        biometric_weight: float = 0.3,
        behavioral_weight: float = 0.5
    ):
        self.audio_weight = audio_weight
        self.biometric_weight = biometric_weight
        self.behavioral_weight = behavioral_weight
        
        # Modality-specific thresholds
        self.audio_threshold = 0.6
        self.biometric_threshold = 0.7
        self.behavioral_threshold = 0.5
        
        # History buffers
        self.audio_history = deque(maxlen=30)   # Last 30 seconds
        self.biometric_history = deque(maxlen=60)  # Last 60 seconds
        self.behavioral_history = deque(maxlen=100)  # Last 100 events
        
        # Thread safety
        self.lock = threading.Lock()
        
    def update_audio(self, features: AudioFeatures):
        """Update audio feature buffer"""
        with self.lock:
            self.audio_history.append(features)
    
    def update_biometric(self, features: BiometricFeatures):
        """Update biometric feature buffer"""
        with self.lock:
            self.biometric_history.append(features)
    
    def update_behavioral(self, features: BehavioralFeatures):
        """Update behavioral feature buffer"""
        with self.lock:
            self.behavioral_history.append(features)
    
    def get_audio_confusion(self) -> Tuple[float, str]:
        """Get confusion score from audio signals"""
        with self.lock:
            if not self.audio_history:
                return 0.0, "no_audio"
            
            recent = list(self.audio_history)[-10:]  # Last 10 samples
            
            # Compute weighted features
            speech_rate = np.mean([f.speech_rate for f in recent])
            hesitations = np.mean([f.hesitations for f in recent])
            pause_freq = np.mean([f.pause_frequency for f in recent])
            
            # Confusion indicators:
            # - Slower speech rate
            # - More hesitations
            # - More pauses
            
            confusion = 0.0
            
            if speech_rate < 120:  # Slow speech
                confusion += 0.3
            if hesitations > 5:  # Many hesitations
                confusion += 0.4
            if pause_freq > 3:  # Frequent pauses
                confusion += 0.3
                
            confusion = min(confusion, 1.0)
            
            return confusion, self._get_audio_reason(hesitations, speech_rate, pause_freq)
    
    def _get_audio_reason(self, hesitations: float, speech_rate: float, pause_freq: float) -> str:
        """Generate human-readable audio confusion reason"""
        reasons = []
        if hesitations > 5:
            reasons.append("frequent_hesitations")
        if speech_rate < 120:
            reasons.append("slow_speech")
        if pause_freq > 3:
            reasons.append("frequent_pauses")
        return ",".join(reasons) if reasons else "normal"
    
    def get_biometric_confusion(self) -> Tuple[float, str]:
        """Get confusion score from biometric signals"""
        with self.lock:
            if not self.biometric_history:
                return 0.0, "no_biometric"
            
            recent = list(self.biometric_history)[-20:]  # Last 20 samples
            
            hr = np.mean([f.heart_rate for f in recent])
            hrv = np.mean([f.heart_rate_variability for f in recent])
            gsr = np.mean([f.skin_conductance for f in recent])
            
            # Confusion indicators:
            # - Elevated heart rate
            # - Lower HRV (stress indicator)
            # - Higher GSR (arousal)
            
            confusion = 0.0
            
            if hr > 85:  # Elevated HR
                confusion += 0.3
            if hrv < 30:  # Low HRV
                confusion += 0.3
            if gsr > 10:  # Elevated GSR
                confusion += 0.4
                
            confusion = min(confusion, 1.0)
            
            return confusion, self._get_biometric_reason(hr, hrv, gsr)
    
    def _get_biometric_reason(self, hr: float, hrv: float, gsr: float) -> str:
        """Generate human-readable biometric confusion reason"""
        reasons = []
        if hr > 85:
            reasons.append("elevated_heart_rate")
        if hrv < 30:
            reasons.append("low_hrv")
        if gsr > 10:
            reasons.append("high_arousal")
        return ",".join(reasons) if reasons else "normal"
    
    def get_behavioral_confusion(self) -> Tuple[float, str]:
        """Get confusion score from behavioral signals"""
        with self.lock:
            if not self.behavioral_history:
                return 0.0, "no_behavioral"
            
            recent = list(self.behavioral_history)[-20:]  # Last 20 events
            
            mouse_h = np.mean([f.mouse_hesitation for f in recent])
            scrolls = np.mean([f.scroll_reversals for f in recent])
            back_btn = np.mean([f.back_button for f in recent])
            
            confusion = 0.0
            
            if mouse_h > 3:
                confusion += 0.3
            if scrolls > 5:
                confusion += 0.3
            if back_btn > 3:
                confusion += 0.2
                
            confusion = min(confusion, 1.0)
            
            return confusion, self._get_behavioral_reason(mouse_h, scrolls, back_btn)
    
    def _get_behavioral_reason(self, mouse_h: float, scrolls: float, back_btn: float) -> str:
        """Generate human-readable behavioral confusion reason"""
        reasons = []
        if mouse_h > 3:
            reasons.append("mouse_hesitation")
        if scrolls > 5:
            reasons.append("scroll_reversals")
        if back_btn > 3:
            reasons.append("back_button_usage")
        return ",".join(reasons) if reasons else "normal"
    
    def get_fused_confusion(self) -> Dict[str, Any]:
        """
        Get fused multi-modal confusion assessment.
        
        Returns:
            Dict with confusion scores, reasons, and confidence
        """
        audio_score, audio_reason = self.get_audio_confusion()
        biometric_score, biometric_reason = self.get_biometric_confusion()
        behavioral_score, behavioral_reason = self.get_behavioral_confusion()
        
        # Weighted fusion
        fused_score = (
            audio_score * self.audio_weight +
            biometric_score * self.biometric_weight +
            behavioral_score * self.behavioral_weight
        )
        
        # Confidence based on signal availability
        n_signals = sum([
            len(self.audio_history) > 0,
            len(self.biometric_history) > 0,
            len(self.behavioral_history) > 0
        ])
        confidence = min(n_signals / 3.0, 1.0)
        
        # Primary indicator (highest weighted contribution)
        contributions = {
            'audio': audio_score * self.audio_weight,
            'biometric': biometric_score * self.biometric_weight,
            'behavioral': behavioral_score * self.behavioral_weight
        }
        primary_indicator = max(contributions, key=contributions.get)
        
        return {
            'confusion_score': fused_score,
            'confidence': confidence,
            'primary_indicator': primary_indicator,
            'audio_score': audio_score,
            'biometric_score': biometric_score,
            'behavioral_score': behavioral_score,
            'audio_reason': audio_reason,
            'biometric_reason': biometric_reason,
            'behavioral_reason': behavioral_reason,
            'suggested_action': self._get_suggested_action(fused_score, primary_indicator),
            'available_modalities': {
                'audio': len(self.audio_history) > 0,
                'biometric': len(self.biometric_history) > 0,
                'behavioral': len(self.behavioral_history) > 0
            }
        }
    
    def _get_suggested_action(self, score: float, primary: str) -> str:
        """Get suggested intervention based on confusion level"""
        if score < 0.3:
            return "continue_learning"
        elif score < 0.5:
            return "offer_hint"
        elif score < 0.7:
            return "trigger_ai_explanation"
        else:
            return "pause_and_assess"
    
    def reset(self):
        """Reset all buffers"""
        with self.lock:
            self.audio_history.clear()
            self.biometric_history.clear()
            self.behavioral_history.clear()


class AudioAnalyzer:
    """
    Real-time audio analysis for confusion detection.
    
    Requires: microphone input (simulated for now)
    """
    
    def __init__(self):
        self.sample_buffer = deque(maxlen=1000)
        self.is_recording = False
        self.sample_rate = 16000
        
    def start_recording(self):
        """Start audio capture"""
        self.is_recording = True
        self.sample_buffer.clear()
    
    def stop_recording(self):
        """Stop audio capture"""
        self.is_recording = False
    
    def add_audio_sample(self, amplitude: float):
        """Add audio amplitude sample"""
        if self.is_recording:
            self.sample_buffer.append({
                'amplitude': amplitude,
                'timestamp': time.time()
            })
    
    def analyze(self) -> AudioFeatures:
        """Analyze audio buffer and extract features"""
        if len(self.sample_buffer) < 100:
            return AudioFeatures()
        
        amplitudes = [s['amplitude'] for s in self.sample_buffer]
        
        # Simple feature extraction
        features = AudioFeatures()
        
        # Detect pauses (low amplitude segments)
        threshold = np.mean(amplitudes) * 0.3
        is_pause = amplitudes < threshold
        pause_durations = []
        current_pause = 0
        
        for p in is_pause:
            if p:
                current_pause += 1
            else:
                if current_pause > 0:
                    pause_durations.append(current_pause)
                current_pause = 0
        
        features.pause_frequency = len(pause_durations) / (len(amplitudes) / self.sample_rate) * 60
        features.pause_duration = np.mean(pause_durations) * 1000 / self.sample_rate if pause_durations else 0
        
        # Volume level
        features.volume_level = np.mean(amplitudes)
        
        return features


class BiometricProcessor:
    """
    Processes biometric data for confusion detection.
    
    Supports: heart rate monitors, GSR sensors, eye trackers
    """
    
    def __init__(self):
        self.data_buffer = deque(maxlen=60)
        
    def add_reading(
        self,
        heart_rate: Optional[float] = None,
        hrv: Optional[float] = None,
        gsr: Optional[float] = None,
        skin_temp: Optional[float] = None,
        blink_rate: Optional[float] = None,
        eye_open: Optional[float] = None
    ):
        """Add biometric reading"""
        self.data_buffer.append({
            'heart_rate': heart_rate,
            'hrv': hrv,
            'gsr': gsr,
            'skin_temp': skin_temp,
            'blink_rate': blink_rate,
            'eye_open': eye_open,
            'timestamp': time.time()
        })
    
    def analyze(self) -> BiometricFeatures:
        """Analyze biometric buffer and extract features"""
        if len(self.data_buffer) < 5:
            return BiometricFeatures()
        
        features = BiometricFeatures()
        
        hr_values = [d['heart_rate'] for d in self.data_buffer if d['heart_rate']]
        hrv_values = [d['hrv'] for d in self.data_buffer if d['hrv']]
        gsr_values = [d['gsr'] for d in self.data_buffer if d['gsr']]
        
        if hr_values:
            features.heart_rate = np.mean(hr_values)
        if hrv_values:
            features.heart_rate_variability = np.mean(hrv_values)
        if gsr_values:
            features.skin_conductance = np.mean(gsr_values)
        
        return features


# API integration
class MultiModalAPI:
    """REST API for multi-modal confusion detection"""
    
    def __init__(self, fusion: MultiModalFusion):
        self.fusion = fusion
        self.audio_analyzer = AudioAnalyzer()
        self.biometric_processor = BiometricProcessor()
    
    def process_audio(self, amplitude: float):
        """Process audio sample"""
        self.audio_analyzer.add_audio_sample(amplitude)
        features = self.audio_analyzer.analyze()
        self.fusion.update_audio(features)
        return features
    
    def process_biometric(
        self,
        heart_rate: Optional[float] = None,
        hrv: Optional[float] = None,
        gsr: Optional[float] = None
    ):
        """Process biometric data"""
        self.biometric_processor.add_reading(
            heart_rate=heart_rate,
            hrv=hrv,
            gsr=gsr
        )
        features = self.biometric_processor.analyze()
        self.fusion.update_biometric(features)
        return features
    
    def process_behavioral(
        self,
        mouse_hesitation: float = 0,
        scroll_reversals: float = 0,
        time_on_page: float = 0
    ):
        """Process behavioral data"""
        features = BehavioralFeatures(
            mouse_hesitation=mouse_hesitation,
            scroll_reversals=scroll_reversals,
            time_on_page=time_on_page
        )
        self.fusion.update_behavioral(features)
        return features
    
    def get_confusion_assessment(self) -> Dict:
        """Get multi-modal confusion assessment"""
        return self.fusion.get_fused_confusion()


# Demo
if __name__ == "__main__":
    fusion = MultiModalFusion()
    api = MultiModalAPI(fusion)
    
    print("Multi-Modal Confusion Detection Demo")
    print("=" * 40)
    
    # Simulate data collection
    for i in range(20):
        # Audio: increasing hesitation
        api.process_audio(amplitude=0.3 if i < 10 else 0.1)
        
        # Biometric: elevated stress
        api.process_biometric(
            heart_rate=75 + i * 0.5,
            hrv=40 - i * 0.3,
            gsr=8 + i * 0.2
        )
        
        # Behavioral: more reversals
        api.process_behavioral(
            mouse_hesitation=2 + i * 0.2,
            scroll_reversals=3 + i * 0.3,
            time_on_page=60 + i * 3
        )
    
    # Get assessment
    result = api.get_confusion_assessment()
    
    print(f"Confusion Score: {result['confusion_score']:.2f}")
    print(f"Confidence: {result['confidence']:.2f}")
    print(f"Primary Indicator: {result['primary_indicator']}")
    print(f"Biometric Score: {result['biometric_score']:.2f}")
    print(f"Behavioral Score: {result['behavioral_score']:.2f}")
    print(f"Suggested Action: {result['suggested_action']}")