""" Reading Pattern Analyzer Detects if someone is reading prepared answers vs speaking naturally. Key indicators of reading: - Consistent speech rate (no natural variation) - Lack of filler words ("um", "uh", "like", "you know") - Regular pause patterns - Monotonic rhythm """ import numpy as np from dataclasses import dataclass, field from typing import List, Optional # Common filler words in English FILLER_WORDS = [ 'um', 'uh', 'uhm', 'umm', 'er', 'ah', 'like', 'you know', 'basically', 'actually', 'so', 'well', 'i mean', 'kind of', 'sort of', 'right', 'okay' ] @dataclass class ReadingPatternResult: """Result of reading pattern analysis.""" is_reading: bool confidence: float # 0.0 to 1.0 indicators: List[str] = field(default_factory=list) speech_rate_cv: float = 0.0 # Coefficient of variation filler_word_rate: float = 0.0 # Fillers per minute pause_regularity: float = 0.0 # How regular pauses are class ReadingPatternAnalyzer: """ Analyzes speech patterns to detect if someone is reading. Uses transcription with timestamps to analyze: - Speech rate variation - Filler word frequency - Pause patterns """ def __init__(self, min_speech_rate_cv: float = 0.15, min_filler_rate: float = 2.0, reading_threshold: float = 0.6): """ Args: min_speech_rate_cv: Minimum coefficient of variation for natural speech min_filler_rate: Minimum filler words per minute for natural speech reading_threshold: Confidence threshold to flag as reading """ self.min_speech_rate_cv = min_speech_rate_cv self.min_filler_rate = min_filler_rate self.reading_threshold = reading_threshold def analyze(self, transcription: str, word_timestamps: List[dict], duration_seconds: float) -> ReadingPatternResult: """ Analyze transcription for reading patterns. Args: transcription: Full transcription text word_timestamps: List of {'word': str, 'start': float, 'end': float} duration_seconds: Total audio duration Returns: ReadingPatternResult with analysis """ if not word_timestamps or len(word_timestamps) < 10: return ReadingPatternResult( is_reading=False, confidence=0.0, indicators=["Insufficient data for analysis"] ) indicators = [] scores = [] # 1. Analyze speech rate variation speech_rate_cv = self._analyze_speech_rate(word_timestamps) if speech_rate_cv < self.min_speech_rate_cv: indicators.append(f"Constant speech rate (CV={speech_rate_cv:.2f})") scores.append(0.8) else: scores.append(0.2) # 2. Analyze filler word frequency filler_rate = self._analyze_filler_words(transcription, duration_seconds) if filler_rate < self.min_filler_rate: indicators.append(f"Few filler words ({filler_rate:.1f}/min)") scores.append(0.7) else: scores.append(0.2) # 3. Analyze pause patterns pause_regularity = self._analyze_pause_patterns(word_timestamps) if pause_regularity > 0.7: indicators.append(f"Regular pause pattern ({pause_regularity:.0%})") scores.append(0.6) else: scores.append(0.2) # 4. Check for natural speech markers has_corrections = self._has_self_corrections(transcription) if not has_corrections: indicators.append("No self-corrections detected") scores.append(0.5) else: scores.append(0.1) # Calculate overall confidence confidence = np.mean(scores) is_reading = confidence >= self.reading_threshold return ReadingPatternResult( is_reading=is_reading, confidence=round(confidence, 2), indicators=indicators, speech_rate_cv=round(speech_rate_cv, 3), filler_word_rate=round(filler_rate, 2), pause_regularity=round(pause_regularity, 2) ) def _analyze_speech_rate(self, word_timestamps: List[dict]) -> float: """ Calculate coefficient of variation of speech rate. Natural speech has variable rate, reading is more constant. """ if len(word_timestamps) < 5: return 0.0 # Calculate words per second in sliding windows window_size = 3.0 # seconds hop = 1.0 # seconds rates = [] max_time = word_timestamps[-1].get('end', 0) for start in np.arange(0, max_time - window_size, hop): end = start + window_size words_in_window = [ w for w in word_timestamps if w.get('start', 0) >= start and w.get('end', 0) <= end ] if words_in_window: rate = len(words_in_window) / window_size rates.append(rate) if len(rates) < 3: return 0.0 # Coefficient of variation (std / mean) mean_rate = np.mean(rates) if mean_rate == 0: return 0.0 cv = np.std(rates) / mean_rate return cv def _analyze_filler_words(self, transcription: str, duration_seconds: float) -> float: """ Count filler words per minute. Natural speech has more fillers, reading has fewer. """ text_lower = transcription.lower() filler_count = 0 for filler in FILLER_WORDS: # Count occurrences (word boundaries) import re pattern = r'\b' + re.escape(filler) + r'\b' matches = re.findall(pattern, text_lower) filler_count += len(matches) # Calculate per minute rate minutes = duration_seconds / 60.0 if minutes < 0.1: return 0.0 return filler_count / minutes def _analyze_pause_patterns(self, word_timestamps: List[dict]) -> float: """ Analyze regularity of pauses between words. Reading tends to have more regular pauses. """ if len(word_timestamps) < 5: return 0.0 # Calculate gaps between consecutive words gaps = [] for i in range(1, len(word_timestamps)): prev_end = word_timestamps[i-1].get('end', 0) curr_start = word_timestamps[i].get('start', 0) gap = curr_start - prev_end if gap > 0.05: # Ignore very small gaps gaps.append(gap) if len(gaps) < 3: return 0.0 # Calculate regularity (inverse of coefficient of variation) mean_gap = np.mean(gaps) if mean_gap == 0: return 0.0 cv = np.std(gaps) / mean_gap regularity = 1.0 / (1.0 + cv) # Higher = more regular return regularity def _has_self_corrections(self, transcription: str) -> bool: """ Check for self-corrections which indicate natural speech. E.g., "I went to the... I mean, I was going to the store" """ correction_markers = [ 'i mean', 'sorry', 'no wait', 'actually', 'let me', 'what i meant', 'no no', 'sorry i', 'wait' ] text_lower = transcription.lower() for marker in correction_markers: if marker in text_lower: return True # Check for repeated words (stammering/correction) words = text_lower.split() for i in range(1, len(words)): if words[i] == words[i-1] and len(words[i]) > 2: return True return False