Authenticity / app.py
ranamhamoud's picture
Upload folder using huggingface_hub
15bac23 unverified
raw
history blame
22.3 kB
import gradio as gr
import os
from pipeline import AuthenticityDetectionPipeline
import traceback
try:
pipeline = AuthenticityDetectionPipeline(whisper_model_size="base")
pipeline_ready = True
except Exception:
pipeline_ready = False
def analyze_audio_file(audio_file):
if not pipeline_ready:
return (
"Error: Pipeline not initialized. Please check the installation.",
"", "", "", ""
)
if audio_file is None:
return (
"Please upload an audio file.",
"", "", "", ""
)
try:
language_code = None
results = pipeline.analyze_audio(audio_file, language=language_code)
audio_class = results['audio_classification']
asr = results['speech_recognition']
text_auth = results['text_authenticity']
final = results['final_assessment']
verdict_color = {
"AUTHENTIC": "#10b981",
"LIKELY AUTHENTIC": "#3b82f6",
"QUESTIONABLE": "#f59e0b",
"LIKELY INAUTHENTIC": "#ef4444"
}
color = verdict_color.get(final['verdict'], '#6b7280')
overall_status = f"""
<div style='background: white; border: 2px solid {color}; padding: 25px; border-radius: 16px; margin: 10px 0;'>
<h2 style='color: {color}; margin: 0 0 15px 0; font-size: 24px; font-weight: 700;'>
{final['verdict']}
</h2>
<div style='display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 15px; margin: 15px 0;'>
<div style='text-align: center; padding: 15px; background: white; border-radius: 10px;'>
<div style='font-size: 2em; font-weight: bold; color: {color};'>{final['composite_authenticity_score']*100:.0f}%</div>
<div style='color: #666; margin-top: 5px;'>Authenticity Score</div>
</div>
<div style='text-align: center; padding: 15px; background: white; border-radius: 10px;'>
<div style='font-size: 2em; font-weight: bold; color: {color};'>{final['risk_level'].upper()}</div>
<div style='color: #666; margin-top: 5px;'>Risk Level</div>
</div>
<div style='text-align: center; padding: 15px; background: white; border-radius: 10px;'>
<div style='font-size: 2em; font-weight: bold; color: #667eea;'>{results['processing_time']:.1f}s</div>
<div style='color: #666; margin-top: 5px;'>Processing Time</div>
</div>
</div>
<div style='background: white; padding: 15px; border-radius: 10px; margin-top: 15px;'>
<em style='color: #555;'>{final['recommendation']}</em>
</div>
</div>
"""
acoustic_output = audio_class['interpretation']
transcription_output = "### Speech Transcription\n\n"
transcription_output += f"| Metric | Value |\n"
transcription_output += f"|--------|-------|\n"
transcription_output += f"| **Language** | {asr['language'].upper()} |\n"
transcription_output += f"| **Duration** | {asr['duration']:.1f} seconds |\n"
transcription_output += f"| **Word Count** | {asr['word_count']} words |\n"
transcription_output += f"| **Speech Rate** | {asr['speech_rate']:.1f} words/min |\n\n"
if asr['speech_rate'] > 160:
transcription_output += "**Fast speech rate** - Above average speaking speed\n\n"
elif asr['speech_rate'] < 120:
transcription_output += "**Slow speech rate** - Below average speaking speed\n\n"
else:
transcription_output += "**Normal speech rate** - Average conversational pace\n\n"
transcription_output += "---\n\n"
transcription_output += "#### Full Transcription\n\n"
transcription_output += f"> {asr['transcription']}"
if 'kopparapu_score' in asr:
classification = asr['kopparapu_classification'].upper()
confidence = asr['kopparapu_score'] if asr['kopparapu_score'] >= 0.5 else (1 - asr['kopparapu_score'])
speech_patterns = f" ### **Classification: {classification} SPEECH**\n\n"
speech_patterns += f"**Score:** {asr['kopparapu_score']:.3f} (0=spontaneous, 1=read)\n"
speech_patterns += f"**Confidence:** {confidence*100:.1f}%\n\n"
speech_patterns += "---\n\n"
speech_patterns += "#### Linguistic Metrics\n\n"
kf = asr['kopparapu_features']
speech_patterns += "| Feature | Value | Interpretation |\n"
speech_patterns += "|---------|-------|----------------|\n"
speech_patterns += f"| **Characters/Word** | {kf['chars_per_word']:.2f} | "
if kf['chars_per_word'] > 5.5:
speech_patterns += "Complex vocabulary |\n"
elif kf['chars_per_word'] < 4.5:
speech_patterns += "Simple vocabulary |\n"
else:
speech_patterns += "Average complexity |\n"
speech_patterns += f"| **Words/Second** | {kf['words_per_sec']:.2f} | "
if kf['words_per_sec'] > 3:
speech_patterns += "Fast pacing |\n"
elif kf['words_per_sec'] < 2:
speech_patterns += "Slow pacing |\n"
else:
speech_patterns += "Normal pacing |\n"
speech_patterns += f"| **Filler Rate** | {kf['filler_rate']*100:.1f}% | "
if kf['filler_rate'] > 0.05:
speech_patterns += "High (spontaneous) |\n"
elif kf['filler_rate'] < 0.02:
speech_patterns += "Low (scripted) |\n"
else:
speech_patterns += "Moderate |\n"
speech_patterns += f"| **Repetitions** | {kf['repetition_count']} | "
if kf['repetition_count'] > 3:
speech_patterns += "Multiple (thinking aloud) |\n"
elif kf['repetition_count'] == 0:
speech_patterns += "None (prepared) |\n"
else:
speech_patterns += "Few |\n"
speech_patterns += "\n---\n\n"
speech_patterns += "#### Reading Style Indicators\n\n"
speech_patterns += "| Feature | Value | Interpretation |\n"
speech_patterns += "|---------|-------|----------------|\n"
# Pause regularity
pause_reg = kf.get('pause_regularity', 0.5)
speech_patterns += f"| **Pause Regularity** | {pause_reg:.2f} | "
if pause_reg > 0.7:
speech_patterns += "Very regular (read) |\n"
elif pause_reg > 0.4:
speech_patterns += "Moderate |\n"
else:
speech_patterns += "Irregular (spontaneous) |\n"
# Speech rate variability
rate_var = kf.get('speech_rate_variability', 0.0)
speech_patterns += f"| **Rate Variability** | {rate_var:.2f} | "
if rate_var > 0.6:
speech_patterns += "High (spontaneous) |\n"
elif rate_var > 0.3:
speech_patterns += "Moderate |\n"
else:
speech_patterns += "Steady pace (read) |\n"
# Sentence variance
sent_var = kf.get('sentence_length_variance', 0.0)
speech_patterns += f"| **Sentence Variance** | {sent_var:.2f} | "
if sent_var > 0.5:
speech_patterns += "Variable (spontaneous) |\n"
elif sent_var > 0.25:
speech_patterns += "Moderate |\n"
else:
speech_patterns += "Uniform (read) |\n"
# Self-corrections
corrections = kf.get('self_correction_count', 0)
speech_patterns += f"| **Self-Corrections** | {corrections} | "
if corrections > 2:
speech_patterns += "Multiple (spontaneous) |\n"
elif corrections > 0:
speech_patterns += "Few |\n"
else:
speech_patterns += "None (scripted) |\n"
speech_patterns += "\n"
speech_patterns += "---\n\n"
speech_patterns += "#### Filler Words & Disfluencies\n\n"
filler_ratio = asr['filler_words']['ratio']
speech_patterns += f"**Count:** {asr['filler_words']['count']} filler words\n"
speech_patterns += f"**Ratio:** {filler_ratio*100:.2f}% of speech\n\n"
if asr['filler_words']['details']:
speech_patterns += "**Found:** " + ', '.join([f"*{k}* ({v})" for k, v in asr['filler_words']['details'].items()]) + "\n\n"
if filler_ratio > 0.05:
speech_patterns += "**High filler usage** - Strong indicator of spontaneous, unscripted speech\n\n"
elif filler_ratio < 0.02:
speech_patterns += "**Low filler usage** - May indicate reading or highly rehearsed speech\n\n"
else:
speech_patterns += "**Moderate filler usage** - Normal conversational pattern\n\n"
speech_patterns += "---\n\n"
speech_patterns += "#### Pause Patterns\n\n"
pause_var = asr['pause_patterns']['pause_variability']
speech_patterns += f"**Total Pauses:** {asr['pause_patterns']['num_pauses']}\n"
speech_patterns += f"**Average Duration:** {asr['pause_patterns']['avg_pause']:.2f}s\n"
speech_patterns += f"**Longest Pause:** {asr['pause_patterns']['max_pause']:.2f}s\n"
speech_patterns += f"**Variability:** {pause_var:.2f}\n\n"
if pause_var < 0.3:
speech_patterns += "**Regular pauses** - Consistent pattern suggests reading at punctuation marks\n\n"
elif pause_var > 0.6:
speech_patterns += "**Irregular pauses** - Natural thinking breaks indicate spontaneous speech\n\n"
else:
speech_patterns += "**Moderate variability** - Mixed pattern\n\n"
is_ai = text_auth['ai_detection']['ai_generated']
ai_prob = text_auth['ai_detection']['confidence']
if is_ai:
ai_output = "### **AI-GENERATED LIKELY**\n\n"
else:
ai_output = "### **HUMAN-WRITTEN LIKELY**\n\n"
ai_output += "**Confidence:**\n\n"
bar_length = 30
ai_bars = int(ai_prob * bar_length)
human_bars = bar_length - ai_bars
ai_output += f"```\nAI: [{'█' * ai_bars}{'░' * human_bars}] {ai_prob*100:.0f}%\n"
ai_output += f"Human: [{'█' * human_bars}{'░' * ai_bars}] {(1-ai_prob)*100:.0f}%\n```\n\n"
ai_output += "---\n\n"
ai_output += "#### Interpretation\n\n"
ai_interpretation = text_auth['ai_detection'].get('interpretation', 'No interpretation available.')
if ai_interpretation:
ai_output += ai_interpretation
else:
ai_output += "No interpretation available."
return (
overall_status,
acoustic_output,
transcription_output,
speech_patterns,
ai_output,
)
except Exception as e:
error_msg = f"Error during analysis:\n\n{str(e)}\n\n{traceback.format_exc()}"
return (error_msg, "", "", "", "", "")
def create_interface():
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@300;400;500;600;700&display=swap');
.gradio-container {
font-family: 'IBM Plex Sans', sans-serif !important;
background: white !important;
}
.contain {
max-width: 100% !important;
width: 100% !important;
margin: 0 auto !important;
background: white !important;
padding: 0 !important;
}
.tab-nav button {
font-family: 'IBM Plex Sans', sans-serif;
font-size: 14px;
font-weight: 500;
padding: 10px 16px;
border-radius: 8px 8px 0 0;
transition: all 0.2s;
}
.tab-nav button.selected {
background: #2563eb;
color: white;
font-weight: 600;
}
button.primary, .primary {
background: #2563eb !important;
color: white !important;
border: none !important;
font-size: 16px !important;
font-weight: 600 !important;
padding: 12px 24px !important;
border-radius: 8px !important;
transition: all 0.2s !important;
}
button.primary:hover, .primary:hover {
background: #1d4ed8 !important;
}
.markdown-text {
font-family: 'IBM Plex Sans', sans-serif;
line-height: 1.7;
}
h1, h2, h3, h4 {
font-family: 'IBM Plex Sans', sans-serif;
font-weight: 600;
}
"""
with gr.Blocks(title="Authenticity Detection System") as demo:
gr.HTML(f"""
<style>
{custom_css}
</style>
<header style='background: white; border-bottom: 1px solid #e5e7eb; margin-bottom: 32px;'>
<div style='padding: 16px 0;'>
<div style='display: flex; align-items: center; gap: 12px;'>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="32" height="32">
<defs>
<linearGradient id="g" x1="0" y1="0" x2="64" y2="0" gradientUnits="userSpaceOnUse">
<stop offset="0" stop-color="#1d4ed8" />
<stop offset="1" stop-color="#0ea5e9" />
</linearGradient>
</defs>
<rect x="0" y="0" width="64" height="64" rx="12" fill="#ffffff"/>
<path d="M4 32 C 10 18, 18 46, 24 32 S 36 18, 40 32 52 46, 60 32"
fill="none" stroke="url(#g)" stroke-width="4" stroke-linecap="round" stroke-linejoin="round"/>
</svg>
<div>
<p style='margin: 0; font-size: 11px; text-transform: uppercase; letter-spacing: 1.5px; color: #6b7280; font-weight: 500;'>
LEIDEN UNIVERSITY · LIACS
</p>
<h1 style='margin: 0; font-size: 18px; font-weight: 600; color: #111827;'>
Audio Processing & Indexing Project
</h1>
</div>
</div>
</div>
</header>
<section style='background: linear-gradient(to bottom, white, #f9fafb); margin-bottom: 40px;'>
<div style='padding: 32px 0;'>
<h2 style='font-size: 32px; font-weight: 700; line-height: 1.2; color: #111827; margin: 0 0 16px 0;'>
Detecting AI-Assisted Responses in Online Settings
</h2>
<p style='font-size: 18px; color: #374151; margin: 0 0 24px 0;'>
</p>
<div style='display: flex; flex-wrap: wrap; gap: 12px;'>
<span style='display: inline-flex; align-items: center; padding: 8px 16px; background: #eff6ff; color: #1e40af; border-radius: 8px; font-size: 14px; font-weight: 500;'>
Multi-Modal Analysis
</span>
<span style='display: inline-flex; align-items: center; padding: 8px 16px; background: #fef3c7; color: #92400e; border-radius: 8px; font-size: 14px; font-weight: 500;'>
Acoustic + Linguistic
</span>
</div>
</div>
</section>
""")
with gr.Row():
with gr.Column(scale=1):
gr.HTML("""
<div style='background: white; border: 1px solid #e5e7eb; padding: 20px; border-radius: 16px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); margin-bottom: 20px;'>
<h3 style='margin: 0; font-size: 18px; font-weight: 600; color: #111827;'>Audio Input</h3>
<p style='margin: 8px 0 0 0; font-size: 14px; color: #6b7280;'>Upload or record your audio file</p>
</div>
""")
audio_input = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Audio File",
show_label=False
)
analyze_btn = gr.Button(
"Analyze Audio",
variant="primary",
size="lg"
)
gr.HTML("""
<div style='background: white; border: 1px solid #e5e7eb; padding: 20px; border-radius: 16px; margin-top: 20px;'>
<h4 style='margin: 0 0 12px 0; font-size: 14px; font-weight: 600; color: #111827;'>Requirements</h4>
<ul style='margin: 0; padding-left: 20px; font-size: 13px; color: #6b7280; line-height: 1.8;'>
<li><strong>Formats:</strong> WAV, MP3, M4A, FLAC, OGG</li>
<li><strong>Duration:</strong> 30 sec - 5 min</li>
</ul>
</div>
<div style='background: #fef3c7; border: 1px solid #fbbf24; padding: 16px; border-radius: 12px; margin-top: 16px;'>
<div style='font-size: 12px; color: #92400e; line-height: 1.6;'>
<strong>Note:</strong> Provides probabilistic assessments.
Use as one factor in evaluation.
</div>
</div>
""")
with gr.Column(scale=2):
gr.HTML("""
<div style='background: white; border: 1px solid #e5e7eb; padding: 20px; border-radius: 16px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); margin-bottom: 20px;'>
<h3 style='margin: 0; font-size: 18px; font-weight: 600; color: #111827;'>Analysis Results</h3>
<p style='margin: 8px 0 0 0; font-size: 14px; color: #6b7280;'>You'll see results here</p>
</div>
""")
overall_output = gr.Markdown()
with gr.Tabs() as tabs:
with gr.Tab("Acoustic Features"):
acoustic_output = gr.Markdown()
with gr.Tab("Transcription"):
transcription_output = gr.Markdown()
with gr.Tab("Speech Patterns"):
speech_output = gr.Markdown()
with gr.Tab("AI Detection"):
ai_output = gr.Markdown()
# Add example audio files with caching
gr.HTML("""
<div style='margin-top: 20px; margin-bottom: 10px;'>
<h4 style='margin: 0 0 8px 0; font-size: 14px; font-weight: 600; color: #111827;'>Try these examples:</h4>
</div>
""")
examples_dir = os.path.join(os.path.dirname(__file__), "examples")
gr.Examples(
examples=[
[os.path.join(examples_dir, "read1.ogg")],
[os.path.join(examples_dir, "spontaneous1.ogg")]
],
inputs=[audio_input],
outputs=[
overall_output,
acoustic_output,
transcription_output,
speech_output,
ai_output,
],
fn=analyze_audio_file,
label="",
examples_per_page=2,
cache_examples=True
)
def show_loading():
loading_html = """
<div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border: 2px solid #667eea; padding: 30px; border-radius: 16px; margin: 10px 0; text-align: center;'>
<h2 style='color: white; margin: 0 0 15px 0; font-size: 24px; font-weight: 700;'>
Analyzing...
</h2>
<div style='margin-top: 20px;'>
<div style='display: inline-block; width: 12px; height: 12px; border-radius: 50%; background: white; margin: 0 4px; animation: pulse 1.5s ease-in-out infinite;'></div>
<div style='display: inline-block; width: 12px; height: 12px; border-radius: 50%; background: white; margin: 0 4px; animation: pulse 1.5s ease-in-out 0.2s infinite;'></div>
<div style='display: inline-block; width: 12px; height: 12px; border-radius: 50%; background: white; margin: 0 4px; animation: pulse 1.5s ease-in-out 0.4s infinite;'></div>
</div>
</div>
<style>
@keyframes pulse {
0%, 100% { opacity: 0.3; transform: scale(0.8); }
50% { opacity: 1; transform: scale(1.2); }
}
</style>
"""
loading_msg = " **Processing...**"
return loading_html, loading_msg, loading_msg, loading_msg, loading_msg
analyze_btn.click(
fn=show_loading,
inputs=None,
outputs=[
overall_output,
acoustic_output,
transcription_output,
speech_output,
ai_output,
],
queue=False
).then(
fn=analyze_audio_file,
inputs=[audio_input],
outputs=[
overall_output,
acoustic_output,
transcription_output,
speech_output,
ai_output,
]
)
gr.HTML("""
<footer style='border-top: 1px solid #e5e7eb; background: white; margin-top: 48px; padding: 32px 0;'>
<div style='text-align: center;'>
<p style='margin: 0; font-size: 14px; color: #6b7280;'>
</p>
<p style='margin: 8px 0 0 0; font-size: 13px; color: #9ca3af;'>
</p>
</div>
</footer>
""")
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)