Spaces:
Sleeping
Sleeping
File size: 9,162 Bytes
4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 4c2ceb8 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 95ad43e 4ec806c 0b42831 95ad43e 0b42831 4ec806c 0b42831 95ad43e 0b42831 95ad43e 0b42831 95ad43e 0b42831 95ad43e 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c 0b42831 4ec806c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
import time
from audio_classifier import AudioClassifier
from speech_recognizer import SpeechRecognizer
from text_analyzer import TextAuthenticityAnalyzer
# Main pipeline class that orchestrates all analysis components
class AuthenticityDetectionPipeline:
def __init__(
self,
audio_model_path=None,
whisper_model_size="base",
device=None,
ai_detection_threshold=0.78
):
print("\n" + "="*60)
print("Initializing Multimodal Authenticity Detection Pipeline")
print("="*60 + "\n")
# load the CNN-based audio classifier
print("π Loading Audio Classifier (CNN)...")
self.audio_classifier = AudioClassifier(
model_path=audio_model_path,
device=device
)
# load whisper model for speech-to-text
print("\nπ€ Loading Speech Recognizer (Whisper)...")
self.speech_recognizer = SpeechRecognizer(
model_size=whisper_model_size,
device=device
)
# load text analyzer for AI detection
print("\nπ Loading Text Authenticity Analyzer...")
self.text_analyzer = TextAuthenticityAnalyzer(device=device, ai_threshold=ai_detection_threshold)
print("\nβ
Pipeline initialization complete!")
print("="*60 + "\n")
# main analysis function - runs all stages
def analyze_audio(self, audio_path, language=None):
print("\n" + "="*60)
print("MULTIMODAL AUTHENTICITY ANALYSIS")
print("="*60 + "\n")
start_time = time.time()
# stage 1: classify audio using CNN
print("Stage 1: CNN Audio Classification...")
print("-" * 40)
audio_results = self.audio_classifier.classify(audio_path)
print(f"β CNN classification complete")
print(f" ## Classification: {audio_results['classification'].upper()}")
print(f" Confidence: {audio_results['confidence']*100:.1f}%")
# stage 2: transcribe and analyze speech patterns
print("\nStage 2: Speech Analysis (Whisper)...")
print("-" * 40)
asr_results = self.speech_recognizer.transcribe(audio_path, language=language)
print(f"β Speech analysis complete")
print(f" Language: {asr_results['language']}")
print(f" Word count: {asr_results['word_count']}")
print(f" Kopparapu classification: {asr_results['kopparapu_classification'].upper()}")
# stage 3: analyze transcribed text for AI patterns
print("\nStage 3: Analyzing text authenticity...")
print("-" * 40)
text_results = self.text_analyzer.analyze(asr_results['transcription'])
print(f"β Text analysis complete")
print(f" Authenticity score: {text_results['authenticity_score']*100:.1f}%")
print(f" Risk level: {text_results['risk_level'].upper()}")
# stage 4: combine all results into final assessment
print("\nStage 4: Generating final assessment...")
print("-" * 40)
final_assessment = self._generate_final_assessment(
audio_results,
asr_results,
text_results
)
elapsed_time = time.time() - start_time
print(f"β Analysis complete in {elapsed_time:.2f} seconds")
print("\n" + "="*60 + "\n")
return {
'audio_classification': audio_results,
'speech_recognition': asr_results,
'asr': asr_results, # alias for backwards compatibility
'text_analysis': text_results,
'text_authenticity': text_results,
'final_assessment': final_assessment,
'processing_time': elapsed_time
}
# combine scores from all components into final verdict
def _generate_final_assessment(
self,
audio_results,
asr_results,
text_results
):
# calculate audio score - spontaneous = authentic
if audio_results['classification'] == 'spontaneous':
audio_score = audio_results['confidence']
else:
audio_score = 1.0 - audio_results['confidence']
# kopparapu score - invert so spontaneous = high authenticity
speech_pattern_score = 1.0 - asr_results['kopparapu_score']
# filler words indicate spontaneous speech
filler_ratio = asr_results['filler_words']['ratio']
filler_score = min(1.0, filler_ratio / 0.05)
# pause variability - higher = more spontaneous
pause_var = asr_results['pause_patterns']['pause_variability']
pause_score = min(1.0, pause_var / 0.5)
# text authenticity from AI detector
text_auth_score = text_results['authenticity_score']
# get additional linguistic features
kf = asr_results['kopparapu_features']
# speech rate variability
rate_var = kf.get('speech_rate_variability', 0.0)
rate_var_score = min(1.0, rate_var / 0.15)
# pause regularity - lower = more spontaneous
pause_reg = kf.get('pause_regularity', 0.5)
pause_reg_score = 1.0 - pause_reg
# self-corrections indicate spontaneous speech
corrections = kf.get('self_correction_count', 0)
correction_score = min(1.0, corrections / 2.0)
# calculate weighted composite score
# weights: CNN+Prosody=15%, Linguistic=35%, AI Detection=50%
composite_score = (
audio_score * 0.15 +
speech_pattern_score * 0.25 +
filler_score * 0.05 +
pause_score * 0.03 +
rate_var_score * 0.02 +
text_auth_score * 0.50
)
# determine verdict based on composite score
if composite_score >= 0.7:
verdict = "AUTHENTIC"
risk = "low"
recommendation = "Response appears genuine with strong authenticity indicators."
elif composite_score >= 0.5:
verdict = "LIKELY AUTHENTIC"
risk = "moderate"
recommendation = "Response shows mostly authentic characteristics but has some concerns."
elif composite_score >= 0.3:
verdict = "QUESTIONABLE"
risk = "high"
recommendation = "Response has multiple authenticity concerns. Further investigation recommended."
else:
verdict = "LIKELY INAUTHENTIC"
risk = "critical"
recommendation = "Response shows strong indicators of inauthenticity. Manual review required."
# collect concerns and strengths
concerns = []
strengths = []
# check CNN classification
if audio_results['classification'] == 'read':
concerns.append(f"CNN detected read speech pattern ({audio_results['confidence']*100:.0f}% confidence)")
else:
strengths.append(f"CNN detected spontaneous speech ({audio_results['confidence']*100:.0f}% confidence)")
# check linguistic analysis
if asr_results['kopparapu_classification'] == 'read':
concerns.append(f"Linguistic analysis suggests read speech (score: {asr_results['kopparapu_score']:.2f})")
else:
strengths.append(f"Linguistic analysis suggests spontaneous speech (score: {asr_results['kopparapu_score']:.2f})")
# check filler words
filler_ratio = asr_results['filler_words']['ratio']
if filler_ratio < 0.02:
concerns.append(f"Low filler word usage ({filler_ratio*100:.1f}%) suggests scripted speech")
else:
strengths.append(f"Natural filler word usage ({filler_ratio*100:.1f}%) indicates spontaneity")
# check pause patterns
if asr_results['pause_patterns']['pause_variability'] < 0.3:
concerns.append("Regular pause patterns suggest reading at punctuation")
else:
strengths.append("Irregular pause patterns indicate spontaneous thinking")
# check AI detection
if text_results['ai_detection']['ai_generated']:
concerns.append(f"AI-generated text detected ({text_results['ai_detection']['confidence']*100:.0f}% probability)")
# check text originality
if text_results['authenticity_score'] > 0.7:
strengths.append("Text shows strong originality indicators")
return {
'verdict': verdict,
'risk_level': risk,
'composite_authenticity_score': float(composite_score),
'concerns': concerns,
'strengths': strengths,
'recommendation': recommendation,
}
# test code - runs when script is executed directly
if __name__ == "__main__":
print("Initializing Authenticity Detection Pipeline...")
model_path = "spectrogram_cnn_3s_window.pth"
pipeline = AuthenticityDetectionPipeline(
audio_model_path=model_path,
whisper_model_size="base"
)
print("\nPipeline ready for audio analysis.")
|