Spaces:
Sleeping
Sleeping
Upload pipeline.py with huggingface_hub
Browse files- pipeline.py +18 -14
pipeline.py
CHANGED
|
@@ -1,8 +1,3 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Multimodal Authenticity Detection Pipeline
|
| 3 |
-
Integrates CNN audio classification, Whisper ASR, and text authenticity analysis
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
from typing import Dict, Optional
|
| 7 |
import time
|
| 8 |
from audio_classifier import AudioClassifier
|
|
@@ -102,22 +97,32 @@ class AuthenticityDetectionPipeline:
|
|
| 102 |
text_results: Dict
|
| 103 |
) -> Dict:
|
| 104 |
|
|
|
|
| 105 |
if audio_results['classification'] == 'spontaneous':
|
| 106 |
audio_score = audio_results['confidence']
|
| 107 |
else: # read
|
| 108 |
audio_score = 1.0 - audio_results['confidence']
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
speech_pattern_score = 1.0 - asr_results['kopparapu_score']
|
| 114 |
|
| 115 |
-
|
|
|
|
|
|
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
composite_score = (
|
| 118 |
-
audio_score * 0.
|
| 119 |
-
speech_pattern_score * 0.
|
| 120 |
-
|
|
|
|
|
|
|
| 121 |
)
|
| 122 |
|
| 123 |
if composite_score >= 0.7:
|
|
@@ -186,4 +191,3 @@ if __name__ == "__main__":
|
|
| 186 |
whisper_model_size="base"
|
| 187 |
)
|
| 188 |
print("\nPipeline ready for audio analysis.")
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from typing import Dict, Optional
|
| 2 |
import time
|
| 3 |
from audio_classifier import AudioClassifier
|
|
|
|
| 97 |
text_results: Dict
|
| 98 |
) -> Dict:
|
| 99 |
|
| 100 |
+
# CNN score: spontaneous = authentic (high), read = inauthentic (low)
|
| 101 |
if audio_results['classification'] == 'spontaneous':
|
| 102 |
audio_score = audio_results['confidence']
|
| 103 |
else: # read
|
| 104 |
audio_score = 1.0 - audio_results['confidence']
|
| 105 |
|
| 106 |
+
# Kopparapu score: 0=spontaneous, 1=read
|
| 107 |
+
# Invert so spontaneous (low kopparapu) = high authenticity
|
| 108 |
+
speech_pattern_score = 1.0 - asr_results['kopparapu_score']
|
|
|
|
| 109 |
|
| 110 |
+
# Filler words: higher ratio = more spontaneous = more authentic
|
| 111 |
+
filler_ratio = asr_results['filler_words']['ratio']
|
| 112 |
+
filler_score = min(1.0, filler_ratio / 0.05) # Normalize: 5%+ = max score
|
| 113 |
|
| 114 |
+
# Pause variability: higher = more spontaneous = more authentic
|
| 115 |
+
pause_var = asr_results['pause_patterns']['pause_variability']
|
| 116 |
+
pause_score = min(1.0, pause_var / 0.5) # Normalize: 0.5+ = max score
|
| 117 |
+
|
| 118 |
+
text_auth_score = text_results['authenticity_score']
|
| 119 |
+
|
| 120 |
composite_score = (
|
| 121 |
+
audio_score * 0.15 + # CNN - weakest component
|
| 122 |
+
speech_pattern_score * 0.20 + # Kopparapu linguistic
|
| 123 |
+
filler_score * 0.10 + # Filler word ratio
|
| 124 |
+
pause_score * 0.05 + # Pause variability
|
| 125 |
+
text_auth_score * 0.50 # Text authenticity - strongest signal
|
| 126 |
)
|
| 127 |
|
| 128 |
if composite_score >= 0.7:
|
|
|
|
| 191 |
whisper_model_size="base"
|
| 192 |
)
|
| 193 |
print("\nPipeline ready for audio analysis.")
|
|
|