Authenticity / pipeline.py
Ranam Hamoud
Update files and add .gitignore, remove pycache from tracking
0b42831
import time
from audio_classifier import AudioClassifier
from speech_recognizer import SpeechRecognizer
from text_analyzer import TextAuthenticityAnalyzer
# Main pipeline class that orchestrates all analysis components
class AuthenticityDetectionPipeline:
def __init__(
self,
audio_model_path=None,
whisper_model_size="base",
device=None,
ai_detection_threshold=0.78
):
print("\n" + "="*60)
print("Initializing Multimodal Authenticity Detection Pipeline")
print("="*60 + "\n")
# load the CNN-based audio classifier
print("πŸ“Š Loading Audio Classifier (CNN)...")
self.audio_classifier = AudioClassifier(
model_path=audio_model_path,
device=device
)
# load whisper model for speech-to-text
print("\n🎀 Loading Speech Recognizer (Whisper)...")
self.speech_recognizer = SpeechRecognizer(
model_size=whisper_model_size,
device=device
)
# load text analyzer for AI detection
print("\nπŸ“ Loading Text Authenticity Analyzer...")
self.text_analyzer = TextAuthenticityAnalyzer(device=device, ai_threshold=ai_detection_threshold)
print("\nβœ… Pipeline initialization complete!")
print("="*60 + "\n")
# main analysis function - runs all stages
def analyze_audio(self, audio_path, language=None):
print("\n" + "="*60)
print("MULTIMODAL AUTHENTICITY ANALYSIS")
print("="*60 + "\n")
start_time = time.time()
# stage 1: classify audio using CNN
print("Stage 1: CNN Audio Classification...")
print("-" * 40)
audio_results = self.audio_classifier.classify(audio_path)
print(f"βœ“ CNN classification complete")
print(f" ## Classification: {audio_results['classification'].upper()}")
print(f" Confidence: {audio_results['confidence']*100:.1f}%")
# stage 2: transcribe and analyze speech patterns
print("\nStage 2: Speech Analysis (Whisper)...")
print("-" * 40)
asr_results = self.speech_recognizer.transcribe(audio_path, language=language)
print(f"βœ“ Speech analysis complete")
print(f" Language: {asr_results['language']}")
print(f" Word count: {asr_results['word_count']}")
print(f" Kopparapu classification: {asr_results['kopparapu_classification'].upper()}")
# stage 3: analyze transcribed text for AI patterns
print("\nStage 3: Analyzing text authenticity...")
print("-" * 40)
text_results = self.text_analyzer.analyze(asr_results['transcription'])
print(f"βœ“ Text analysis complete")
print(f" Authenticity score: {text_results['authenticity_score']*100:.1f}%")
print(f" Risk level: {text_results['risk_level'].upper()}")
# stage 4: combine all results into final assessment
print("\nStage 4: Generating final assessment...")
print("-" * 40)
final_assessment = self._generate_final_assessment(
audio_results,
asr_results,
text_results
)
elapsed_time = time.time() - start_time
print(f"βœ“ Analysis complete in {elapsed_time:.2f} seconds")
print("\n" + "="*60 + "\n")
return {
'audio_classification': audio_results,
'speech_recognition': asr_results,
'asr': asr_results, # alias for backwards compatibility
'text_analysis': text_results,
'text_authenticity': text_results,
'final_assessment': final_assessment,
'processing_time': elapsed_time
}
# combine scores from all components into final verdict
def _generate_final_assessment(
self,
audio_results,
asr_results,
text_results
):
# calculate audio score - spontaneous = authentic
if audio_results['classification'] == 'spontaneous':
audio_score = audio_results['confidence']
else:
audio_score = 1.0 - audio_results['confidence']
# kopparapu score - invert so spontaneous = high authenticity
speech_pattern_score = 1.0 - asr_results['kopparapu_score']
# filler words indicate spontaneous speech
filler_ratio = asr_results['filler_words']['ratio']
filler_score = min(1.0, filler_ratio / 0.05)
# pause variability - higher = more spontaneous
pause_var = asr_results['pause_patterns']['pause_variability']
pause_score = min(1.0, pause_var / 0.5)
# text authenticity from AI detector
text_auth_score = text_results['authenticity_score']
# get additional linguistic features
kf = asr_results['kopparapu_features']
# speech rate variability
rate_var = kf.get('speech_rate_variability', 0.0)
rate_var_score = min(1.0, rate_var / 0.15)
# pause regularity - lower = more spontaneous
pause_reg = kf.get('pause_regularity', 0.5)
pause_reg_score = 1.0 - pause_reg
# self-corrections indicate spontaneous speech
corrections = kf.get('self_correction_count', 0)
correction_score = min(1.0, corrections / 2.0)
# calculate weighted composite score
# weights: CNN+Prosody=15%, Linguistic=35%, AI Detection=50%
composite_score = (
audio_score * 0.15 +
speech_pattern_score * 0.25 +
filler_score * 0.05 +
pause_score * 0.03 +
rate_var_score * 0.02 +
text_auth_score * 0.50
)
# determine verdict based on composite score
if composite_score >= 0.7:
verdict = "AUTHENTIC"
risk = "low"
recommendation = "Response appears genuine with strong authenticity indicators."
elif composite_score >= 0.5:
verdict = "LIKELY AUTHENTIC"
risk = "moderate"
recommendation = "Response shows mostly authentic characteristics but has some concerns."
elif composite_score >= 0.3:
verdict = "QUESTIONABLE"
risk = "high"
recommendation = "Response has multiple authenticity concerns. Further investigation recommended."
else:
verdict = "LIKELY INAUTHENTIC"
risk = "critical"
recommendation = "Response shows strong indicators of inauthenticity. Manual review required."
# collect concerns and strengths
concerns = []
strengths = []
# check CNN classification
if audio_results['classification'] == 'read':
concerns.append(f"CNN detected read speech pattern ({audio_results['confidence']*100:.0f}% confidence)")
else:
strengths.append(f"CNN detected spontaneous speech ({audio_results['confidence']*100:.0f}% confidence)")
# check linguistic analysis
if asr_results['kopparapu_classification'] == 'read':
concerns.append(f"Linguistic analysis suggests read speech (score: {asr_results['kopparapu_score']:.2f})")
else:
strengths.append(f"Linguistic analysis suggests spontaneous speech (score: {asr_results['kopparapu_score']:.2f})")
# check filler words
filler_ratio = asr_results['filler_words']['ratio']
if filler_ratio < 0.02:
concerns.append(f"Low filler word usage ({filler_ratio*100:.1f}%) suggests scripted speech")
else:
strengths.append(f"Natural filler word usage ({filler_ratio*100:.1f}%) indicates spontaneity")
# check pause patterns
if asr_results['pause_patterns']['pause_variability'] < 0.3:
concerns.append("Regular pause patterns suggest reading at punctuation")
else:
strengths.append("Irregular pause patterns indicate spontaneous thinking")
# check AI detection
if text_results['ai_detection']['ai_generated']:
concerns.append(f"AI-generated text detected ({text_results['ai_detection']['confidence']*100:.0f}% probability)")
# check text originality
if text_results['authenticity_score'] > 0.7:
strengths.append("Text shows strong originality indicators")
return {
'verdict': verdict,
'risk_level': risk,
'composite_authenticity_score': float(composite_score),
'concerns': concerns,
'strengths': strengths,
'recommendation': recommendation,
}
# test code - runs when script is executed directly
if __name__ == "__main__":
print("Initializing Authenticity Detection Pipeline...")
model_path = "spectrogram_cnn_3s_window.pth"
pipeline = AuthenticityDetectionPipeline(
audio_model_path=model_path,
whisper_model_size="base"
)
print("\nPipeline ready for audio analysis.")