Spaces:

ranamhamoud
/

Authenticity

Sleeping

App Files Files Community

Authenticity / app.py

ranamhamoud

Upload folder using huggingface_hub

15bac23 unverified about 2 months ago

raw

history blame

22.3 kB

	import gradio as gr
	import os
	from pipeline import AuthenticityDetectionPipeline
	import traceback

	try:
	pipeline = AuthenticityDetectionPipeline(whisper_model_size="base")
	pipeline_ready = True
	except Exception:
	pipeline_ready = False


	def analyze_audio_file(audio_file):
	if not pipeline_ready:
	return (
	"Error: Pipeline not initialized. Please check the installation.",
	"", "", "", ""
	)

	if audio_file is None:
	return (
	"Please upload an audio file.",
	"", "", "", ""
	)

	try:
	language_code = None
	results = pipeline.analyze_audio(audio_file, language=language_code)

	audio_class = results['audio_classification']
	asr = results['speech_recognition']
	text_auth = results['text_authenticity']
	final = results['final_assessment']


	verdict_color = {
	"AUTHENTIC": "#10b981",
	"LIKELY AUTHENTIC": "#3b82f6",
	"QUESTIONABLE": "#f59e0b",
	"LIKELY INAUTHENTIC": "#ef4444"
	}

	color = verdict_color.get(final['verdict'], '#6b7280')

	overall_status = f"""
	<div style='background: white; border: 2px solid {color}; padding: 25px; border-radius: 16px; margin: 10px 0;'>
	<h2 style='color: {color}; margin: 0 0 15px 0; font-size: 24px; font-weight: 700;'>
	{final['verdict']}
	</h2>
	<div style='display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 15px; margin: 15px 0;'>
	<div style='text-align: center; padding: 15px; background: white; border-radius: 10px;'>
	<div style='font-size: 2em; font-weight: bold; color: {color};'>{final['composite_authenticity_score']*100:.0f}%</div>
	<div style='color: #666; margin-top: 5px;'>Authenticity Score</div>
	</div>
	<div style='text-align: center; padding: 15px; background: white; border-radius: 10px;'>
	<div style='font-size: 2em; font-weight: bold; color: {color};'>{final['risk_level'].upper()}</div>
	<div style='color: #666; margin-top: 5px;'>Risk Level</div>
	</div>
	<div style='text-align: center; padding: 15px; background: white; border-radius: 10px;'>
	<div style='font-size: 2em; font-weight: bold; color: #667eea;'>{results['processing_time']:.1f}s</div>
	<div style='color: #666; margin-top: 5px;'>Processing Time</div>
	</div>
	</div>
	<div style='background: white; padding: 15px; border-radius: 10px; margin-top: 15px;'>
	<em style='color: #555;'>{final['recommendation']}</em>
	</div>
	</div>
	"""
	acoustic_output = audio_class['interpretation']

	transcription_output = "### Speech Transcription\n\n"
	transcription_output += f"\| Metric \| Value \|\n"
	transcription_output += f"\|--------\|-------\|\n"
	transcription_output += f"\| Language \| {asr['language'].upper()} \|\n"
	transcription_output += f"\| Duration \| {asr['duration']:.1f} seconds \|\n"
	transcription_output += f"\| Word Count \| {asr['word_count']} words \|\n"
	transcription_output += f"\| Speech Rate \| {asr['speech_rate']:.1f} words/min \|\n\n"
	if asr['speech_rate'] > 160:
	transcription_output += "Fast speech rate - Above average speaking speed\n\n"
	elif asr['speech_rate'] < 120:
	transcription_output += "Slow speech rate - Below average speaking speed\n\n"
	else:
	transcription_output += "Normal speech rate - Average conversational pace\n\n"

	transcription_output += "---\n\n"
	transcription_output += "#### Full Transcription\n\n"
	transcription_output += f"> {asr['transcription']}"


	if 'kopparapu_score' in asr:
	classification = asr['kopparapu_classification'].upper()
	confidence = asr['kopparapu_score'] if asr['kopparapu_score'] >= 0.5 else (1 - asr['kopparapu_score'])

	speech_patterns = f" ### Classification: {classification} SPEECH\n\n"
	speech_patterns += f"Score: {asr['kopparapu_score']:.3f} (0=spontaneous, 1=read)\n"
	speech_patterns += f"Confidence: {confidence*100:.1f}%\n\n"

	speech_patterns += "---\n\n"
	speech_patterns += "#### Linguistic Metrics\n\n"
	kf = asr['kopparapu_features']

	speech_patterns += "\| Feature \| Value \| Interpretation \|\n"
	speech_patterns += "\|---------\|-------\|----------------\|\n"
	speech_patterns += f"\| Characters/Word \| {kf['chars_per_word']:.2f} \| "
	if kf['chars_per_word'] > 5.5:
	speech_patterns += "Complex vocabulary \|\n"
	elif kf['chars_per_word'] < 4.5:
	speech_patterns += "Simple vocabulary \|\n"
	else:
	speech_patterns += "Average complexity \|\n"

	speech_patterns += f"\| Words/Second \| {kf['words_per_sec']:.2f} \| "
	if kf['words_per_sec'] > 3:
	speech_patterns += "Fast pacing \|\n"
	elif kf['words_per_sec'] < 2:
	speech_patterns += "Slow pacing \|\n"
	else:
	speech_patterns += "Normal pacing \|\n"

	speech_patterns += f"\| Filler Rate \| {kf['filler_rate']*100:.1f}% \| "
	if kf['filler_rate'] > 0.05:
	speech_patterns += "High (spontaneous) \|\n"
	elif kf['filler_rate'] < 0.02:
	speech_patterns += "Low (scripted) \|\n"
	else:
	speech_patterns += "Moderate \|\n"

	speech_patterns += f"\| Repetitions \| {kf['repetition_count']} \| "
	if kf['repetition_count'] > 3:
	speech_patterns += "Multiple (thinking aloud) \|\n"
	elif kf['repetition_count'] == 0:
	speech_patterns += "None (prepared) \|\n"
	else:
	speech_patterns += "Few \|\n"

	speech_patterns += "\n---\n\n"
	speech_patterns += "#### Reading Style Indicators\n\n"

	speech_patterns += "\| Feature \| Value \| Interpretation \|\n"
	speech_patterns += "\|---------\|-------\|----------------\|\n"

	# Pause regularity
	pause_reg = kf.get('pause_regularity', 0.5)
	speech_patterns += f"\| Pause Regularity \| {pause_reg:.2f} \| "
	if pause_reg > 0.7:
	speech_patterns += "Very regular (read) \|\n"
	elif pause_reg > 0.4:
	speech_patterns += "Moderate \|\n"
	else:
	speech_patterns += "Irregular (spontaneous) \|\n"

	# Speech rate variability
	rate_var = kf.get('speech_rate_variability', 0.0)
	speech_patterns += f"\| Rate Variability \| {rate_var:.2f} \| "
	if rate_var > 0.6:
	speech_patterns += "High (spontaneous) \|\n"
	elif rate_var > 0.3:
	speech_patterns += "Moderate \|\n"
	else:
	speech_patterns += "Steady pace (read) \|\n"

	# Sentence variance
	sent_var = kf.get('sentence_length_variance', 0.0)
	speech_patterns += f"\| Sentence Variance \| {sent_var:.2f} \| "
	if sent_var > 0.5:
	speech_patterns += "Variable (spontaneous) \|\n"
	elif sent_var > 0.25:
	speech_patterns += "Moderate \|\n"
	else:
	speech_patterns += "Uniform (read) \|\n"

	# Self-corrections
	corrections = kf.get('self_correction_count', 0)
	speech_patterns += f"\| Self-Corrections \| {corrections} \| "
	if corrections > 2:
	speech_patterns += "Multiple (spontaneous) \|\n"
	elif corrections > 0:
	speech_patterns += "Few \|\n"
	else:
	speech_patterns += "None (scripted) \|\n"

	speech_patterns += "\n"

	speech_patterns += "---\n\n"
	speech_patterns += "#### Filler Words & Disfluencies\n\n"
	filler_ratio = asr['filler_words']['ratio']
	speech_patterns += f"Count: {asr['filler_words']['count']} filler words\n"
	speech_patterns += f"Ratio: {filler_ratio*100:.2f}% of speech\n\n"

	if asr['filler_words']['details']:
	speech_patterns += "Found: " + ', '.join([f"{k} ({v})" for k, v in asr['filler_words']['details'].items()]) + "\n\n"

	if filler_ratio > 0.05:
	speech_patterns += "High filler usage - Strong indicator of spontaneous, unscripted speech\n\n"
	elif filler_ratio < 0.02:
	speech_patterns += "Low filler usage - May indicate reading or highly rehearsed speech\n\n"
	else:
	speech_patterns += "Moderate filler usage - Normal conversational pattern\n\n"

	speech_patterns += "---\n\n"
	speech_patterns += "#### Pause Patterns\n\n"
	pause_var = asr['pause_patterns']['pause_variability']

	speech_patterns += f"Total Pauses: {asr['pause_patterns']['num_pauses']}\n"
	speech_patterns += f"Average Duration: {asr['pause_patterns']['avg_pause']:.2f}s\n"
	speech_patterns += f"Longest Pause: {asr['pause_patterns']['max_pause']:.2f}s\n"
	speech_patterns += f"Variability: {pause_var:.2f}\n\n"

	if pause_var < 0.3:
	speech_patterns += "Regular pauses - Consistent pattern suggests reading at punctuation marks\n\n"
	elif pause_var > 0.6:
	speech_patterns += "Irregular pauses - Natural thinking breaks indicate spontaneous speech\n\n"
	else:
	speech_patterns += "Moderate variability - Mixed pattern\n\n"

	is_ai = text_auth['ai_detection']['ai_generated']
	ai_prob = text_auth['ai_detection']['confidence']

	if is_ai:
	ai_output = "### AI-GENERATED LIKELY\n\n"
	else:
	ai_output = "### HUMAN-WRITTEN LIKELY\n\n"

	ai_output += "Confidence:\n\n"
	bar_length = 30
	ai_bars = int(ai_prob * bar_length)
	human_bars = bar_length - ai_bars
	ai_output += f"```\nAI: [{'█' * ai_bars}{'░' * human_bars}] {ai_prob*100:.0f}%\n"
	ai_output += f"Human: [{'█' * human_bars}{'░' * ai_bars}] {(1-ai_prob)*100:.0f}%\n```\n\n"

	ai_output += "---\n\n"
	ai_output += "#### Interpretation\n\n"
	ai_interpretation = text_auth['ai_detection'].get('interpretation', 'No interpretation available.')
	if ai_interpretation:
	ai_output += ai_interpretation
	else:
	ai_output += "No interpretation available."

	return (
	overall_status,
	acoustic_output,
	transcription_output,
	speech_patterns,
	ai_output,
	)

	except Exception as e:
	error_msg = f"Error during analysis:\n\n{str(e)}\n\n{traceback.format_exc()}"
	return (error_msg, "", "", "", "", "")


	def create_interface():
	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@300;400;500;600;700&display=swap');

	.gradio-container {
	font-family: 'IBM Plex Sans', sans-serif !important;
	background: white !important;
	}
	.contain {
	max-width: 100% !important;
	width: 100% !important;
	margin: 0 auto !important;
	background: white !important;
	padding: 0 !important;
	}
	.tab-nav button {
	font-family: 'IBM Plex Sans', sans-serif;
	font-size: 14px;
	font-weight: 500;
	padding: 10px 16px;
	border-radius: 8px 8px 0 0;
	transition: all 0.2s;
	}
	.tab-nav button.selected {
	background: #2563eb;
	color: white;
	font-weight: 600;
	}
	button.primary, .primary {
	background: #2563eb !important;
	color: white !important;
	border: none !important;
	font-size: 16px !important;
	font-weight: 600 !important;
	padding: 12px 24px !important;
	border-radius: 8px !important;
	transition: all 0.2s !important;
	}
	button.primary:hover, .primary:hover {
	background: #1d4ed8 !important;
	}
	.markdown-text {
	font-family: 'IBM Plex Sans', sans-serif;
	line-height: 1.7;
	}
	h1, h2, h3, h4 {
	font-family: 'IBM Plex Sans', sans-serif;
	font-weight: 600;
	}
	"""

	with gr.Blocks(title="Authenticity Detection System") as demo:

	gr.HTML(f"""
	<style>
	{custom_css}
	</style>
	<header style='background: white; border-bottom: 1px solid #e5e7eb; margin-bottom: 32px;'>
	<div style='padding: 16px 0;'>
	<div style='display: flex; align-items: center; gap: 12px;'>
	<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="32" height="32">
	<defs>
	<linearGradient id="g" x1="0" y1="0" x2="64" y2="0" gradientUnits="userSpaceOnUse">
	<stop offset="0" stop-color="#1d4ed8" />
	<stop offset="1" stop-color="#0ea5e9" />
	</linearGradient>
	</defs>
	<rect x="0" y="0" width="64" height="64" rx="12" fill="#ffffff"/>
	<path d="M4 32 C 10 18, 18 46, 24 32 S 36 18, 40 32 52 46, 60 32"
	fill="none" stroke="url(#g)" stroke-width="4" stroke-linecap="round" stroke-linejoin="round"/>
	</svg>
	<div>
	<p style='margin: 0; font-size: 11px; text-transform: uppercase; letter-spacing: 1.5px; color: #6b7280; font-weight: 500;'>
	LEIDEN UNIVERSITY · LIACS
	</p>
	<h1 style='margin: 0; font-size: 18px; font-weight: 600; color: #111827;'>
	Audio Processing & Indexing Project
	</h1>
	</div>
	</div>
	</div>
	</header>

	<section style='background: linear-gradient(to bottom, white, #f9fafb); margin-bottom: 40px;'>
	<div style='padding: 32px 0;'>
	<h2 style='font-size: 32px; font-weight: 700; line-height: 1.2; color: #111827; margin: 0 0 16px 0;'>
	Detecting AI-Assisted Responses in Online Settings
	</h2>
	<p style='font-size: 18px; color: #374151; margin: 0 0 24px 0;'>
	</p>
	<div style='display: flex; flex-wrap: wrap; gap: 12px;'>
	<span style='display: inline-flex; align-items: center; padding: 8px 16px; background: #eff6ff; color: #1e40af; border-radius: 8px; font-size: 14px; font-weight: 500;'>
	Multi-Modal Analysis
	</span>
	<span style='display: inline-flex; align-items: center; padding: 8px 16px; background: #fef3c7; color: #92400e; border-radius: 8px; font-size: 14px; font-weight: 500;'>
	Acoustic + Linguistic
	</span>
	</div>
	</div>
	</section>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("""
	<div style='background: white; border: 1px solid #e5e7eb; padding: 20px; border-radius: 16px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); margin-bottom: 20px;'>
	<h3 style='margin: 0; font-size: 18px; font-weight: 600; color: #111827;'>Audio Input</h3>
	<p style='margin: 8px 0 0 0; font-size: 14px; color: #6b7280;'>Upload or record your audio file</p>
	</div>
	""")

	audio_input = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Audio File",
	show_label=False
	)

	analyze_btn = gr.Button(
	"Analyze Audio",
	variant="primary",
	size="lg"
	)

	gr.HTML("""
	<div style='background: white; border: 1px solid #e5e7eb; padding: 20px; border-radius: 16px; margin-top: 20px;'>
	<h4 style='margin: 0 0 12px 0; font-size: 14px; font-weight: 600; color: #111827;'>Requirements</h4>
	<ul style='margin: 0; padding-left: 20px; font-size: 13px; color: #6b7280; line-height: 1.8;'>
	<li><strong>Formats:</strong> WAV, MP3, M4A, FLAC, OGG</li>
	<li><strong>Duration:</strong> 30 sec - 5 min</li>
	</ul>
	</div>

	<div style='background: #fef3c7; border: 1px solid #fbbf24; padding: 16px; border-radius: 12px; margin-top: 16px;'>
	<div style='font-size: 12px; color: #92400e; line-height: 1.6;'>
	<strong>Note:</strong> Provides probabilistic assessments.
	Use as one factor in evaluation.
	</div>
	</div>
	""")

	with gr.Column(scale=2):
	gr.HTML("""
	<div style='background: white; border: 1px solid #e5e7eb; padding: 20px; border-radius: 16px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); margin-bottom: 20px;'>
	<h3 style='margin: 0; font-size: 18px; font-weight: 600; color: #111827;'>Analysis Results</h3>
	<p style='margin: 8px 0 0 0; font-size: 14px; color: #6b7280;'>You'll see results here</p>
	</div>
	""")

	overall_output = gr.Markdown()

	with gr.Tabs() as tabs:
	with gr.Tab("Acoustic Features"):
	acoustic_output = gr.Markdown()

	with gr.Tab("Transcription"):
	transcription_output = gr.Markdown()

	with gr.Tab("Speech Patterns"):
	speech_output = gr.Markdown()

	with gr.Tab("AI Detection"):
	ai_output = gr.Markdown()


	# Add example audio files with caching
	gr.HTML("""
	<div style='margin-top: 20px; margin-bottom: 10px;'>
	<h4 style='margin: 0 0 8px 0; font-size: 14px; font-weight: 600; color: #111827;'>Try these examples:</h4>
	</div>
	""")

	examples_dir = os.path.join(os.path.dirname(__file__), "examples")
	gr.Examples(
	examples=[
	[os.path.join(examples_dir, "read1.ogg")],
	[os.path.join(examples_dir, "spontaneous1.ogg")]
	],
	inputs=[audio_input],
	outputs=[
	overall_output,
	acoustic_output,
	transcription_output,
	speech_output,
	ai_output,
	],
	fn=analyze_audio_file,
	label="",
	examples_per_page=2,
	cache_examples=True
	)

	def show_loading():
	loading_html = """
	<div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border: 2px solid #667eea; padding: 30px; border-radius: 16px; margin: 10px 0; text-align: center;'>
	<h2 style='color: white; margin: 0 0 15px 0; font-size: 24px; font-weight: 700;'>
	Analyzing...
	</h2>
	<div style='margin-top: 20px;'>
	<div style='display: inline-block; width: 12px; height: 12px; border-radius: 50%; background: white; margin: 0 4px; animation: pulse 1.5s ease-in-out infinite;'></div>
	<div style='display: inline-block; width: 12px; height: 12px; border-radius: 50%; background: white; margin: 0 4px; animation: pulse 1.5s ease-in-out 0.2s infinite;'></div>
	<div style='display: inline-block; width: 12px; height: 12px; border-radius: 50%; background: white; margin: 0 4px; animation: pulse 1.5s ease-in-out 0.4s infinite;'></div>
	</div>
	</div>
	<style>
	@keyframes pulse {
	0%, 100% { opacity: 0.3; transform: scale(0.8); }
	50% { opacity: 1; transform: scale(1.2); }
	}
	</style>
	"""
	loading_msg = " Processing..."
	return loading_html, loading_msg, loading_msg, loading_msg, loading_msg

	analyze_btn.click(
	fn=show_loading,
	inputs=None,
	outputs=[
	overall_output,
	acoustic_output,
	transcription_output,
	speech_output,
	ai_output,
	],
	queue=False
	).then(
	fn=analyze_audio_file,
	inputs=[audio_input],
	outputs=[
	overall_output,
	acoustic_output,
	transcription_output,
	speech_output,
	ai_output,
	]
	)

	gr.HTML("""
	<footer style='border-top: 1px solid #e5e7eb; background: white; margin-top: 48px; padding: 32px 0;'>
	<div style='text-align: center;'>
	<p style='margin: 0; font-size: 14px; color: #6b7280;'>
	</p>
	<p style='margin: 8px 0 0 0; font-size: 13px; color: #9ca3af;'>
	</p>
	</div>
	</footer>
	""")

	return demo


	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)