Spaces:
Sleeping
Sleeping
| """ | |
| Reading Pattern Analyzer | |
| Detects if someone is reading prepared answers vs speaking naturally. | |
| Key indicators of reading: | |
| - Consistent speech rate (no natural variation) | |
| - Lack of filler words ("um", "uh", "like", "you know") | |
| - Regular pause patterns | |
| - Monotonic rhythm | |
| """ | |
| import numpy as np | |
| from dataclasses import dataclass, field | |
| from typing import List, Optional | |
| # Common filler words in English | |
| FILLER_WORDS = [ | |
| 'um', 'uh', 'uhm', 'umm', 'er', 'ah', 'like', 'you know', | |
| 'basically', 'actually', 'so', 'well', 'i mean', 'kind of', | |
| 'sort of', 'right', 'okay' | |
| ] | |
| class ReadingPatternResult: | |
| """Result of reading pattern analysis.""" | |
| is_reading: bool | |
| confidence: float # 0.0 to 1.0 | |
| indicators: List[str] = field(default_factory=list) | |
| speech_rate_cv: float = 0.0 # Coefficient of variation | |
| filler_word_rate: float = 0.0 # Fillers per minute | |
| pause_regularity: float = 0.0 # How regular pauses are | |
| class ReadingPatternAnalyzer: | |
| """ | |
| Analyzes speech patterns to detect if someone is reading. | |
| Uses transcription with timestamps to analyze: | |
| - Speech rate variation | |
| - Filler word frequency | |
| - Pause patterns | |
| """ | |
| def __init__(self, | |
| min_speech_rate_cv: float = 0.15, | |
| min_filler_rate: float = 2.0, | |
| reading_threshold: float = 0.6): | |
| """ | |
| Args: | |
| min_speech_rate_cv: Minimum coefficient of variation for natural speech | |
| min_filler_rate: Minimum filler words per minute for natural speech | |
| reading_threshold: Confidence threshold to flag as reading | |
| """ | |
| self.min_speech_rate_cv = min_speech_rate_cv | |
| self.min_filler_rate = min_filler_rate | |
| self.reading_threshold = reading_threshold | |
| def analyze(self, transcription: str, word_timestamps: List[dict], | |
| duration_seconds: float) -> ReadingPatternResult: | |
| """ | |
| Analyze transcription for reading patterns. | |
| Args: | |
| transcription: Full transcription text | |
| word_timestamps: List of {'word': str, 'start': float, 'end': float} | |
| duration_seconds: Total audio duration | |
| Returns: | |
| ReadingPatternResult with analysis | |
| """ | |
| if not word_timestamps or len(word_timestamps) < 10: | |
| return ReadingPatternResult( | |
| is_reading=False, | |
| confidence=0.0, | |
| indicators=["Insufficient data for analysis"] | |
| ) | |
| indicators = [] | |
| scores = [] | |
| # 1. Analyze speech rate variation | |
| speech_rate_cv = self._analyze_speech_rate(word_timestamps) | |
| if speech_rate_cv < self.min_speech_rate_cv: | |
| indicators.append(f"Constant speech rate (CV={speech_rate_cv:.2f})") | |
| scores.append(0.8) | |
| else: | |
| scores.append(0.2) | |
| # 2. Analyze filler word frequency | |
| filler_rate = self._analyze_filler_words(transcription, duration_seconds) | |
| if filler_rate < self.min_filler_rate: | |
| indicators.append(f"Few filler words ({filler_rate:.1f}/min)") | |
| scores.append(0.7) | |
| else: | |
| scores.append(0.2) | |
| # 3. Analyze pause patterns | |
| pause_regularity = self._analyze_pause_patterns(word_timestamps) | |
| if pause_regularity > 0.7: | |
| indicators.append(f"Regular pause pattern ({pause_regularity:.0%})") | |
| scores.append(0.6) | |
| else: | |
| scores.append(0.2) | |
| # 4. Check for natural speech markers | |
| has_corrections = self._has_self_corrections(transcription) | |
| if not has_corrections: | |
| indicators.append("No self-corrections detected") | |
| scores.append(0.5) | |
| else: | |
| scores.append(0.1) | |
| # Calculate overall confidence | |
| confidence = np.mean(scores) | |
| is_reading = confidence >= self.reading_threshold | |
| return ReadingPatternResult( | |
| is_reading=is_reading, | |
| confidence=round(confidence, 2), | |
| indicators=indicators, | |
| speech_rate_cv=round(speech_rate_cv, 3), | |
| filler_word_rate=round(filler_rate, 2), | |
| pause_regularity=round(pause_regularity, 2) | |
| ) | |
| def _analyze_speech_rate(self, word_timestamps: List[dict]) -> float: | |
| """ | |
| Calculate coefficient of variation of speech rate. | |
| Natural speech has variable rate, reading is more constant. | |
| """ | |
| if len(word_timestamps) < 5: | |
| return 0.0 | |
| # Calculate words per second in sliding windows | |
| window_size = 3.0 # seconds | |
| hop = 1.0 # seconds | |
| rates = [] | |
| max_time = word_timestamps[-1].get('end', 0) | |
| for start in np.arange(0, max_time - window_size, hop): | |
| end = start + window_size | |
| words_in_window = [ | |
| w for w in word_timestamps | |
| if w.get('start', 0) >= start and w.get('end', 0) <= end | |
| ] | |
| if words_in_window: | |
| rate = len(words_in_window) / window_size | |
| rates.append(rate) | |
| if len(rates) < 3: | |
| return 0.0 | |
| # Coefficient of variation (std / mean) | |
| mean_rate = np.mean(rates) | |
| if mean_rate == 0: | |
| return 0.0 | |
| cv = np.std(rates) / mean_rate | |
| return cv | |
| def _analyze_filler_words(self, transcription: str, | |
| duration_seconds: float) -> float: | |
| """ | |
| Count filler words per minute. | |
| Natural speech has more fillers, reading has fewer. | |
| """ | |
| text_lower = transcription.lower() | |
| filler_count = 0 | |
| for filler in FILLER_WORDS: | |
| # Count occurrences (word boundaries) | |
| import re | |
| pattern = r'\b' + re.escape(filler) + r'\b' | |
| matches = re.findall(pattern, text_lower) | |
| filler_count += len(matches) | |
| # Calculate per minute rate | |
| minutes = duration_seconds / 60.0 | |
| if minutes < 0.1: | |
| return 0.0 | |
| return filler_count / minutes | |
| def _analyze_pause_patterns(self, word_timestamps: List[dict]) -> float: | |
| """ | |
| Analyze regularity of pauses between words. | |
| Reading tends to have more regular pauses. | |
| """ | |
| if len(word_timestamps) < 5: | |
| return 0.0 | |
| # Calculate gaps between consecutive words | |
| gaps = [] | |
| for i in range(1, len(word_timestamps)): | |
| prev_end = word_timestamps[i-1].get('end', 0) | |
| curr_start = word_timestamps[i].get('start', 0) | |
| gap = curr_start - prev_end | |
| if gap > 0.05: # Ignore very small gaps | |
| gaps.append(gap) | |
| if len(gaps) < 3: | |
| return 0.0 | |
| # Calculate regularity (inverse of coefficient of variation) | |
| mean_gap = np.mean(gaps) | |
| if mean_gap == 0: | |
| return 0.0 | |
| cv = np.std(gaps) / mean_gap | |
| regularity = 1.0 / (1.0 + cv) # Higher = more regular | |
| return regularity | |
| def _has_self_corrections(self, transcription: str) -> bool: | |
| """ | |
| Check for self-corrections which indicate natural speech. | |
| E.g., "I went to the... I mean, I was going to the store" | |
| """ | |
| correction_markers = [ | |
| 'i mean', 'sorry', 'no wait', 'actually', 'let me', | |
| 'what i meant', 'no no', 'sorry i', 'wait' | |
| ] | |
| text_lower = transcription.lower() | |
| for marker in correction_markers: | |
| if marker in text_lower: | |
| return True | |
| # Check for repeated words (stammering/correction) | |
| words = text_lower.split() | |
| for i in range(1, len(words)): | |
| if words[i] == words[i-1] and len(words[i]) > 2: | |
| return True | |
| return False | |