Spaces:

ranamhamoud
/

Authenticity

Sleeping

App Files Files Community

Ranam Hamoud commited on Dec 2, 2025

Commit

521317f

1 Parent(s): 4c2ceb8

Update app, classifier, and speech recognizer; rename model file and update examples

Browse files

Files changed (7) hide show

app.py +72 -35
audio_classifier.py +9 -28
examples/read1.ogg +2 -2
examples/read4.wav +0 -3
examples/spontaneous1.ogg +2 -2
spectrogram_cnn_3s_window (1).pth → spectrogram_cnn_3s_window.pth +0 -0
speech_recognizer.py +150 -13

app.py CHANGED Viewed

@@ -117,14 +117,6 @@ def analyze_audio_file(audio_file):
             else:
                 speech_patterns += "Normal pacing |\n"
-            speech_patterns += f"| **Non-alpha chars/sec** | {kf['nonalpha_per_sec']:.2f} | "
-            if kf['nonalpha_per_sec'] > 2.5:
-                speech_patterns += "High (disfluent) |\n"
-            elif kf['nonalpha_per_sec'] < 1.5:
-                speech_patterns += "Low (fluent) |\n"
-            else:
-                speech_patterns += "Moderate |\n"
             speech_patterns += f"| **Filler Rate** | {kf['filler_rate']*100:.1f}% | "
             if kf['filler_rate'] > 0.05:
                 speech_patterns += "High (spontaneous) |\n"
@@ -141,11 +133,51 @@ def analyze_audio_file(audio_file):
             else:
                 speech_patterns += "Few |\n"
-            speech_patterns += f"| **Alpha Ratio** | {kf['alpha_ratio']:.2f} | "
-            if kf['alpha_ratio'] > 0.85:
-                speech_patterns += "Clean text |\n"
             else:
-                speech_patterns += "With artifacts |\n"
             speech_patterns += "\n"
@@ -217,9 +249,7 @@ def analyze_audio_file(audio_file):
         return (error_msg, "", "", "", "", "")
-def create_interface():
-    """Create and configure Gradio interface."""
     custom_css = """
     @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@300;400;500;600;700&display=swap');
@@ -343,25 +373,6 @@ def create_interface():
                     size="lg"
                 )
-                # Add example audio files
-                gr.HTML("""
-                <div style='margin-top: 20px; margin-bottom: 10px;'>
-                    <h4 style='margin: 0 0 8px 0; font-size: 14px; font-weight: 600; color: #111827;'>Try these examples:</h4>
-                </div>
-                """)
-                examples_dir = os.path.join(os.path.dirname(__file__), "examples")
-                gr.Examples(
-                    examples=[
-                        [os.path.join(examples_dir, "read1.ogg")],
-                        [os.path.join(examples_dir, "spontaneous1.ogg")]
-                    ],
-                    inputs=[audio_input],
-                    label="",
-                    examples_per_page=2,
-                    cache_examples=False
-                )
                 gr.HTML("""
                 <div style='background: white; border: 1px solid #e5e7eb; padding: 20px; border-radius: 16px; margin-top: 20px;'>
                     <h4 style='margin: 0 0 12px 0; font-size: 14px; font-weight: 600; color: #111827;'>Requirements</h4>
@@ -402,7 +413,33 @@ def create_interface():
                     with gr.Tab("AI Detection"):
                         ai_output = gr.Markdown()
         def show_loading():
             loading_html = """

             else:
                 speech_patterns += "Normal pacing |\n"
             speech_patterns += f"| **Filler Rate** | {kf['filler_rate']*100:.1f}% | "
             if kf['filler_rate'] > 0.05:
                 speech_patterns += "High (spontaneous) |\n"
             else:
                 speech_patterns += "Few |\n"
+            speech_patterns += "\n---\n\n"
+            speech_patterns += "#### Reading Style Indicators\n\n"
+            speech_patterns += "| Feature | Value | Interpretation |\n"
+            speech_patterns += "|---------|-------|----------------|\n"
+            # Pause regularity
+            pause_reg = kf.get('pause_regularity', 0.5)
+            speech_patterns += f"| **Pause Regularity** | {pause_reg:.2f} | "
+            if pause_reg > 0.7:
+                speech_patterns += "Very regular (read) |\n"
+            elif pause_reg > 0.4:
+                speech_patterns += "Moderate |\n"
             else:
+                speech_patterns += "Irregular (spontaneous) |\n"
+            # Speech rate variability
+            rate_var = kf.get('speech_rate_variability', 0.0)
+            speech_patterns += f"| **Rate Variability** | {rate_var:.2f} | "
+            if rate_var > 0.6:
+                speech_patterns += "High (spontaneous) |\n"
+            elif rate_var > 0.3:
+                speech_patterns += "Moderate |\n"
+            else:
+                speech_patterns += "Steady pace (read) |\n"
+            # Sentence variance
+            sent_var = kf.get('sentence_length_variance', 0.0)
+            speech_patterns += f"| **Sentence Variance** | {sent_var:.2f} | "
+            if sent_var > 0.5:
+                speech_patterns += "Variable (spontaneous) |\n"
+            elif sent_var > 0.25:
+                speech_patterns += "Moderate |\n"
+            else:
+                speech_patterns += "Uniform (read) |\n"
+            # Self-corrections
+            corrections = kf.get('self_correction_count', 0)
+            speech_patterns += f"| **Self-Corrections** | {corrections} | "
+            if corrections > 2:
+                speech_patterns += "Multiple (spontaneous) |\n"
+            elif corrections > 0:
+                speech_patterns += "Few |\n"
+            else:
+                speech_patterns += "None (scripted) |\n"
             speech_patterns += "\n"
         return (error_msg, "", "", "", "", "")
+def create_interface():
     custom_css = """
     @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@300;400;500;600;700&display=swap');
                     size="lg"
                 )
                 gr.HTML("""
                 <div style='background: white; border: 1px solid #e5e7eb; padding: 20px; border-radius: 16px; margin-top: 20px;'>
                     <h4 style='margin: 0 0 12px 0; font-size: 14px; font-weight: 600; color: #111827;'>Requirements</h4>
                     with gr.Tab("AI Detection"):
                         ai_output = gr.Markdown()
+        # Add example audio files with caching
+        gr.HTML("""
+        <div style='margin-top: 20px; margin-bottom: 10px;'>
+            <h4 style='margin: 0 0 8px 0; font-size: 14px; font-weight: 600; color: #111827;'>Try these examples:</h4>
+        </div>
+        """)
+        examples_dir = os.path.join(os.path.dirname(__file__), "examples")
+        gr.Examples(
+            examples=[
+                [os.path.join(examples_dir, "read1.ogg")],
+                [os.path.join(examples_dir, "spontaneous1.ogg")]
+            ],
+            inputs=[audio_input],
+            outputs=[
+                overall_output,
+                acoustic_output,
+                transcription_output,
+                speech_output,
+                ai_output,
+            ],
+            fn=analyze_audio_file,
+            label="",
+            examples_per_page=2,
+            cache_examples=True
+        )
         def show_loading():
             loading_html = """

audio_classifier.py CHANGED Viewed

@@ -78,7 +78,7 @@ class SpeechStyleCNN(nn.Module):
 class AudioClassifier:
     AVAILABLE_MODELS = {
-        '3s_window': 'spectrogram_cnn_3s_window (1).pth',
         # '4s_window': 'spectrogram_cnn_4s_window.pth',
         # '4s_488x488': 'spectrogram_cnn_4s_window_488_x_488.pth'
     }
@@ -100,7 +100,7 @@ class AudioClassifier:
         if model_path is None:
             import os
-            model_path = os.path.join(os.path.dirname(__file__), 'spectrogram_cnn_3s_window (1).pth')
         try:
             print(f"Attempting to load model from: {model_path}")
@@ -120,7 +120,6 @@ class AudioClassifier:
         self.hop_length = 512
     def extract_mel_spectrogram(self, audio_path: str, window_size: float = 3.0) -> np.ndarray:
-        """Extract mel spectrogram from audio, using windowing if audio is longer than window_size."""
         audio, sr = librosa.load(audio_path, sr=self.sample_rate)
         # If audio is longer than window_size, take multiple windows and average
@@ -215,18 +214,8 @@ class AudioClassifier:
         return features
     def _compute_prosody_scores(self, features: Dict[str, float]) -> Dict:
-        """
-        Optimized prosody scoring based on feature analysis:
-        - spectral_centroid_std: 80% accuracy (threshold ~1017, read >= threshold)
-        - zcr_mean: 75% accuracy (threshold ~0.11, read >= threshold)
-        - energy_mean: 70% accuracy (threshold ~0.06, read < threshold)
-        - pitch_range: 75% accuracy (threshold ~3837, read < threshold)
-        """
         individual_scores = {}
-        # 1. Spectral centroid std - MOST discriminative (separation: 1.11)
-        # Read: 1087 avg, Spontaneous: 1017 avg
-        # Threshold: ~1050, read >= threshold
         sc_std = features['spectral_centroid_std']
         if sc_std >= 1100:
             spectral_score = 0.9  # Strongly indicates read
@@ -245,9 +234,6 @@ class AudioClassifier:
             'interpretation': 'high variability (read)' if spectral_score > 0.6 else 'low variability (spontaneous)' if spectral_score < 0.4 else 'moderate'
         }
-        # 2. ZCR mean - Second most discriminative (separation: 0.81)
-        # Read: 0.12 avg, Spontaneous: 0.10 avg
-        # Threshold: ~0.11, read >= threshold
         zcr = features['zcr_mean']
         if zcr >= 0.13:
             zcr_score = 0.9  # Strongly indicates read
@@ -303,10 +289,10 @@ class AudioClassifier:
         # Optimized weights based on feature separation scores
         weights = {
-            'spectral_variability': 0.40,  # Best discriminator (1.11 separation)
-            'zcr_mean': 0.30,              # Second best (0.81 separation)
-            'energy_level': 0.20,          # Third (0.69 separation)
-            'tempo': 0.10                  # Weakest (0.22 separation)
         }
         overall_score = (
@@ -316,7 +302,6 @@ class AudioClassifier:
             tempo_score * weights['tempo']
         )
-        # More decisive thresholds
         if overall_score > 0.60:
             classification = 'read'
             confidence = 0.5 + (overall_score - 0.5) * 0.8
@@ -324,7 +309,6 @@ class AudioClassifier:
             classification = 'spontaneous'
             confidence = 0.5 + (0.5 - overall_score) * 0.8
         else:
-            # Borderline - slight lean based on score
             classification = 'read' if overall_score >= 0.5 else 'spontaneous'
             confidence = 0.5 + abs(overall_score - 0.5) * 0.6
@@ -346,7 +330,6 @@ class AudioClassifier:
             predicted_class = torch.argmax(probabilities, dim=1).item()
             cnn_confidence = probabilities[0, predicted_class].item()
-            # Debug output - Model: Class 0=read, Class 1=spontaneous
             print(f"CNN Logits: {logits[0].cpu().numpy()}")
             print(f"CNN Probabilities: Class 0 (read)={probabilities[0, 0].item():.3f}, Class 1 (spontaneous)={probabilities[0, 1].item():.3f}")
             print(f"CNN Prediction: Class {predicted_class} ({['read', 'spontaneous'][predicted_class]}) with confidence {cnn_confidence:.3f}")
@@ -362,13 +345,11 @@ class AudioClassifier:
         print(f"CNN classification: {cnn_class_name}")
         print(f"Prosody classification: {prosody_classification} (conf={prosody_confidence:.2f})")
-        # Weighted combination: Prosody is more reliable (60% acc) than CNN (50% acc)
-        # Convert classifications to scores: read=1, spontaneous=0
         cnn_score = 1.0 if cnn_class_name == 'read' else 0.0
         prosody_score = 1.0 if prosody_classification == 'read' else 0.0
-        # Weight prosody more heavily (0.6) than CNN (0.4)
-        # Also factor in confidence
         weighted_score = (
             cnn_score * cnn_confidence * 0.4 +
             prosody_score * prosody_confidence * 0.6

 class AudioClassifier:
     AVAILABLE_MODELS = {
+        '3s_window': 'spectrogram_cnn_3s_window.pth',
         # '4s_window': 'spectrogram_cnn_4s_window.pth',
         # '4s_488x488': 'spectrogram_cnn_4s_window_488_x_488.pth'
     }
         if model_path is None:
             import os
+            model_path = os.path.join(os.path.dirname(__file__), 'spectrogram_cnn_3s_window.pth')
         try:
             print(f"Attempting to load model from: {model_path}")
         self.hop_length = 512
     def extract_mel_spectrogram(self, audio_path: str, window_size: float = 3.0) -> np.ndarray:
         audio, sr = librosa.load(audio_path, sr=self.sample_rate)
         # If audio is longer than window_size, take multiple windows and average
         return features
     def _compute_prosody_scores(self, features: Dict[str, float]) -> Dict:
         individual_scores = {}
         sc_std = features['spectral_centroid_std']
         if sc_std >= 1100:
             spectral_score = 0.9  # Strongly indicates read
             'interpretation': 'high variability (read)' if spectral_score > 0.6 else 'low variability (spontaneous)' if spectral_score < 0.4 else 'moderate'
         }
         zcr = features['zcr_mean']
         if zcr >= 0.13:
             zcr_score = 0.9  # Strongly indicates read
         # Optimized weights based on feature separation scores
         weights = {
+            'spectral_variability': 0.40,
+            'zcr_mean': 0.30,
+            'energy_level': 0.20,
+            'tempo': 0.10
         }
         overall_score = (
             tempo_score * weights['tempo']
         )
         if overall_score > 0.60:
             classification = 'read'
             confidence = 0.5 + (overall_score - 0.5) * 0.8
             classification = 'spontaneous'
             confidence = 0.5 + (0.5 - overall_score) * 0.8
         else:
             classification = 'read' if overall_score >= 0.5 else 'spontaneous'
             confidence = 0.5 + abs(overall_score - 0.5) * 0.6
             predicted_class = torch.argmax(probabilities, dim=1).item()
             cnn_confidence = probabilities[0, predicted_class].item()
             print(f"CNN Logits: {logits[0].cpu().numpy()}")
             print(f"CNN Probabilities: Class 0 (read)={probabilities[0, 0].item():.3f}, Class 1 (spontaneous)={probabilities[0, 1].item():.3f}")
             print(f"CNN Prediction: Class {predicted_class} ({['read', 'spontaneous'][predicted_class]}) with confidence {cnn_confidence:.3f}")
         print(f"CNN classification: {cnn_class_name}")
         print(f"Prosody classification: {prosody_classification} (conf={prosody_confidence:.2f})")
         cnn_score = 1.0 if cnn_class_name == 'read' else 0.0
         prosody_score = 1.0 if prosody_classification == 'read' else 0.0
         weighted_score = (
             cnn_score * cnn_confidence * 0.4 +
             prosody_score * prosody_confidence * 0.6

examples/read1.ogg CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a13e626fdda037d19f32574aa244b3c5d5d8cee9a29777bdc9aa2923ff1035d2
-size 67654

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c8e969d50e75835caf2a52f33c19accdb1cdfa1e069501bad0fc2fe470ea761
+size 157216

examples/read4.wav DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c9dbcda832552f5051a60de21bc10dd1166cfb7039077e4108d6a8e239148ec3
-size 898430

examples/spontaneous1.ogg CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bece27541be3a7e5b09132eec245c141d26b5114a44ba3124b8678914b67345
-size 102198

 version https://git-lfs.github.com/spec/v1
+oid sha256:69b8aeffd1e7a02ed90bcff98d202cd7a97cc57cd1d16a4cdbd4aac2e770b6db
+size 323869

spectrogram_cnn_3s_window (1).pth → spectrogram_cnn_3s_window.pth RENAMED Viewed

File without changes

speech_recognizer.py CHANGED Viewed

@@ -42,7 +42,9 @@ class SpeechRecognizer:
         analysis = self._analyze_transcription(transcription, segments)
         duration = analysis['duration'] if analysis['duration'] > 0 else 1.0
-        kopparapu_features = self._extract_kopparapu_features(transcription, duration)
         kopparapu_score = self._calculate_kopparapu_score(kopparapu_features)
         return {
@@ -140,10 +142,13 @@ class SpeechRecognizer:
             'pause_variability': float(np.std(pauses)) if len(pauses) > 1 else 0.0
         }
-    def _extract_kopparapu_features(self, text: str, duration_sec: float) -> Dict:
         """
-        Extract Kopparapu-like linguistic features from transcription.
-        Based on: https://arxiv.org/pdf/2306.08012
         """
         text = text.strip()
         if len(text) == 0:
@@ -153,7 +158,11 @@ class SpeechRecognizer:
                 'words_per_sec': 0.0,
                 'nonalpha_per_sec': 0.0,
                 'repetition_count': 0,
-                'filler_rate': 0.0
             }
         total_chars = len(text)
@@ -170,8 +179,10 @@ class SpeechRecognizer:
         words_per_sec = num_words / duration_sec
         nonalpha_per_sec = nonalpha_chars / duration_sec
         char_reps = len(re.findall(r'(.)\1{2,}', text))
         words_list = text.lower().split()
         word_reps = 0
         for i in range(len(words_list) - 1):
@@ -180,6 +191,7 @@ class SpeechRecognizer:
         repetition_count = char_reps + word_reps
         lower = text.lower()
         filler_patterns = [
             r'\bum\b', r'\buh\b', r'\buhm\b', r'\ber\b', r'\bah\b',
@@ -193,29 +205,154 @@ class SpeechRecognizer:
             filler_count += len(re.findall(pattern, lower))
         filler_rate = filler_count / num_words
         return {
             'alpha_ratio': float(alpha_ratio),
             'chars_per_word': float(chars_per_word),
             'words_per_sec': float(words_per_sec),
             'nonalpha_per_sec': float(nonalpha_per_sec),
             'repetition_count': int(repetition_count),
-            'filler_rate': float(filler_rate)
         }
     def _logistic(self, x: float, a: float, b: float) -> float:
         return 1.0 / (1.0 + np.exp(-(x - a) / b))
     def _calculate_kopparapu_score(self, features: Dict) -> float:
         f1 = features['chars_per_word']
-        L1 = self._logistic(f1, a=5.0, b=1.5)
         f2 = features['words_per_sec']
-        L2 = self._logistic(f2, a=2.0, b=0.7)
-        f3_raw = features['nonalpha_per_sec'] + 10.0 * features['filler_rate']
-        L3 = self._logistic(-f3_raw, a=0.0, b=1.0)
-        score = 0.4 * L1 + 0.4 * L2 + 0.2 * L3
         return float(score)

         analysis = self._analyze_transcription(transcription, segments)
         duration = analysis['duration'] if analysis['duration'] > 0 else 1.0
+        kopparapu_features = self._extract_kopparapu_features(
+            transcription, duration, segments, analysis['pause_patterns']
+        )
         kopparapu_score = self._calculate_kopparapu_score(kopparapu_features)
         return {
             'pause_variability': float(np.std(pauses)) if len(pauses) > 1 else 0.0
         }
+    def _extract_kopparapu_features(
+        self, text: str, duration_sec: float,
+        segments: List[Dict] = None, pause_patterns: Dict = None
+    ) -> Dict:
         """
+        Extract enhanced Kopparapu-like linguistic features for read speech detection.
+        Based on: https://arxiv.org/pdf/2306.08012 with extensions.
         """
         text = text.strip()
         if len(text) == 0:
                 'words_per_sec': 0.0,
                 'nonalpha_per_sec': 0.0,
                 'repetition_count': 0,
+                'filler_rate': 0.0,
+                'pause_regularity': 0.5,
+                'speech_rate_variability': 0.0,
+                'sentence_length_variance': 0.0,
+                'self_correction_count': 0
             }
         total_chars = len(text)
         words_per_sec = num_words / duration_sec
         nonalpha_per_sec = nonalpha_chars / duration_sec
+        # Character repetitions (e.g., "sooo", "ummmm")
         char_reps = len(re.findall(r'(.)\1{2,}', text))
+        # Word repetitions (e.g., "I I think", "the the")
         words_list = text.lower().split()
         word_reps = 0
         for i in range(len(words_list) - 1):
         repetition_count = char_reps + word_reps
+        # Filler words detection
         lower = text.lower()
         filler_patterns = [
             r'\bum\b', r'\buh\b', r'\buhm\b', r'\ber\b', r'\bah\b',
             filler_count += len(re.findall(pattern, lower))
         filler_rate = filler_count / num_words
+        # NEW: Pause regularity - read speech has regular pauses at punctuation
+        # Low variability = regular pauses = likely read
+        pause_regularity = 0.5  # neutral default
+        if pause_patterns and pause_patterns.get('num_pauses', 0) > 2:
+            pause_var = pause_patterns.get('pause_variability', 0.5)
+            # Normalize: low variability (< 0.2) -> high regularity (close to 1)
+            # High variability (> 0.6) -> low regularity (close to 0)
+            pause_regularity = max(0.0, min(1.0, 1.0 - (pause_var / 0.6)))
+        # NEW: Speech rate variability across segments
+        # Read speech has consistent pacing; spontaneous varies with thinking
+        speech_rate_variability = self._compute_rate_variability(segments) if segments else 0.0
+        # NEW: Sentence length variance - read text has more uniform structure
+        sentence_length_variance = self._compute_sentence_variance(text)
+        # NEW: Self-corrections and false starts (spontaneous speech markers)
+        self_correction_patterns = [
+            r'\bwait\b', r'\bsorry\b', r'\bno\s*,?\s*I\b',
+            r'\bactually\s*,?\s*no\b', r'\blet me\b', r'\bwhat I meant\b',
+            r'\bI meant\b', r'\bhold on\b', r'\bwhat was I\b', r'\bor rather\b'
+        ]
+        self_correction_count = 0
+        for pattern in self_correction_patterns:
+            self_correction_count += len(re.findall(pattern, lower))
         return {
             'alpha_ratio': float(alpha_ratio),
             'chars_per_word': float(chars_per_word),
             'words_per_sec': float(words_per_sec),
             'nonalpha_per_sec': float(nonalpha_per_sec),
             'repetition_count': int(repetition_count),
+            'filler_rate': float(filler_rate),
+            'pause_regularity': float(pause_regularity),
+            'speech_rate_variability': float(speech_rate_variability),
+            'sentence_length_variance': float(sentence_length_variance),
+            'self_correction_count': int(self_correction_count)
         }
+    def _compute_rate_variability(self, segments: List[Dict]) -> float:
+        """
+        Compute speech rate variability across segments.
+        Read speech has consistent rate; spontaneous varies with thinking.
+        Returns 0-1 where higher = more variable = more spontaneous.
+        """
+        if not segments or len(segments) < 3:
+            return 0.0
+        segment_rates = []
+        for seg in segments:
+            duration = seg.get('end', 0) - seg.get('start', 0)
+            if duration > 0.3:  # Only consider segments > 300ms
+                words_in_seg = len(seg.get('text', '').split())
+                rate = words_in_seg / duration
+                if rate > 0:
+                    segment_rates.append(rate)
+        if len(segment_rates) < 3:
+            return 0.0
+        mean_rate = np.mean(segment_rates)
+        std_rate = np.std(segment_rates)
+        # Coefficient of variation normalized to 0-1
+        cv = std_rate / mean_rate if mean_rate > 0 else 0
+        return float(min(1.0, cv / 0.5))  # CV of 0.5+ maps to 1.0
+    def _compute_sentence_variance(self, text: str) -> float:
+        """
+        Compute variance in sentence lengths.
+        Read/scripted text tends to have more uniform sentence structure.
+        Returns 0-1 where higher = more variance = more spontaneous.
+        """
+        # Split into sentences
+        sentences = re.split(r'[.!?]+', text)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        if len(sentences) < 2:
+            return 0.0
+        lengths = [len(s.split()) for s in sentences]
+        mean_len = np.mean(lengths)
+        std_len = np.std(lengths)
+        # Coefficient of variation normalized
+        cv = std_len / mean_len if mean_len > 0 else 0
+        return float(min(1.0, cv / 0.6))  # CV of 0.6+ maps to 1.0
     def _logistic(self, x: float, a: float, b: float) -> float:
+        """Sigmoid function centered at 'a' with steepness 'b'."""
         return 1.0 / (1.0 + np.exp(-(x - a) / b))
     def _calculate_kopparapu_score(self, features: Dict) -> float:
+        """
+        Calculate enhanced Kopparapu score for read vs spontaneous classification.
+        Score closer to 1 = more likely READ, closer to 0 = more likely SPONTANEOUS.
+        Key signals for READ speech:
+        - Higher chars_per_word (formal vocabulary)
+        - Faster, steadier words_per_sec
+        - Lower filler rate and disfluencies
+        - Regular pause patterns (pause_regularity high)
+        - Low speech rate variability
+        - Uniform sentence lengths
+        """
+        # L1: Vocabulary complexity - higher chars/word = more formal = read
         f1 = features['chars_per_word']
+        L1 = self._logistic(f1, a=4.8, b=1.2)
+        # L2: Speaking rate - faster, steadier = read
         f2 = features['words_per_sec']
+        L2 = self._logistic(f2, a=2.2, b=0.6)
+        # L3: Disfluency signal (inverted) - less disfluency = more read
+        # Combines filler rate, nonalpha, and repetitions
+        disfluency = (
+            features['nonalpha_per_sec'] +
+            8.0 * features['filler_rate'] +
+            0.5 * features['repetition_count']
+        )
+        L3 = self._logistic(-disfluency, a=0.0, b=0.8)
+        # L4: Pause regularity - regular pauses = read (already 0-1)
+        L4 = features.get('pause_regularity', 0.5)
+        # L5: Rate variability (inverted) - low variability = read
+        rate_var = features.get('speech_rate_variability', 0.0)
+        L5 = 1.0 - rate_var
+        # L6: Sentence variance (inverted) - uniform sentences = read
+        sent_var = features.get('sentence_length_variance', 0.0)
+        L6 = 1.0 - sent_var
+        # L7: Self-corrections (inverted) - more corrections = spontaneous
+        corrections = features.get('self_correction_count', 0)
+        L7 = self._logistic(-corrections, a=0.0, b=1.5)
+        # Weighted combination optimized for read detection
+        # Higher weights on pause regularity and rate consistency (key read markers)
+        score = (
+            0.15 * L1 +  # Vocabulary complexity
+            0.15 * L2 +  # Speaking rate
+            0.15 * L3 +  # Disfluency (filler/repetition)
+            0.20 * L4 +  # Pause regularity (strong read signal)
+            0.15 * L5 +  # Rate variability
+            0.10 * L6 +  # Sentence uniformity
+            0.10 * L7    # Self-corrections
+        )
         return float(score)