Spaces:

Khalil09
/

AiVoiceDetector

Sleeping

App Files Files Community

AiVoiceDetector / app.py

Khalil09

Update app.py

8851579 verified 25 days ago

raw

history blame contribute delete

8.03 kB

	import os
	import warnings
	import torch
	import librosa
	import librosa.display
	import numpy as np
	import matplotlib.pyplot as plt
	import plotly.graph_objects as go
	import gradio as gr
	from transformers import pipeline

	warnings.filterwarnings("ignore")

	# ── DESIGN TOKENS ──────────────────────────────────────────────────────────
	COLORS = {
	"bg": "#05070a",
	"panel": "#0b0e14",
	"accent": "#00f2ff", # Cyan
	"accent2": "#7000ff", # Purple
	"danger": "#ff0055", # Red
	"success": "#00ff88", # Green
	"text": "#e0e0e0",
	"subtext": "#4e5561"
	}

	# ═══════════════════════════════════════════════════════════════════════════
	# AI VOICE DETECTOR ENGINE
	# ═══════════════════════════════════════════════════════════════════════════
	MODEL_ID = "garystafford/wav2vec2-deepfake-voice-detector"

	try:
	device = 0 if torch.cuda.is_available() else -1
	pipe = pipeline("audio-classification", model=MODEL_ID, device=device)
	except Exception as e:
	print(f"Engine Error: {e}")

	def create_chroma_plt(y, sr):
	plt.style.use('dark_background')
	fig, ax = plt.subplots(figsize=(10, 3))
	fig.patch.set_facecolor(COLORS["panel"])
	ax.set_facecolor(COLORS["panel"])

	chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=512)
	img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax, cmap='coolwarm')

	plt.title("HARMONIC ALIGNMENT (CQT CHROMA)", color=COLORS["accent2"], fontsize=10, pad=10)
	ax.tick_params(axis='both', colors=COLORS["subtext"], labelsize=7)
	plt.colorbar(img, ax=ax).ax.tick_params(colors=COLORS["subtext"])
	plt.tight_layout()
	return fig

	def get_analysis(audio_path):
	y, sr = librosa.load(audio_path, sr=16000, duration=10)

	results = pipe(audio_path)
	scores_dict = {res['label'].upper(): res['score'] for res in results}
	ai_prob = scores_dict.get('FAKE', 0)

	zcr = np.mean(librosa.feature.zero_crossing_rate(y))
	flatness = np.mean(librosa.feature.spectral_flatness(y=y))

	sub_indicators = {
	"Neural Pulse": ai_prob,
	"Spectral Flatness": min(flatness * 25, 1.0),
	"Waveform Jitter": float(np.clip(zcr * 10, 0, 1)),
	"Harmonic Drift": float(np.clip(1 - (np.std(librosa.feature.chroma_stft(y=y)) * 2), 0, 1))
	}

	return ai_prob, sub_indicators, y, sr

	# ═══════════════════════════════════════════════════════════════════════════
	# UI HANDLER (With 50MB File Size Limit)
	# ═══════════════════════════════════════════════════════════════════════════

	def main_process(audio_path):
	if audio_path is None:
	return "<div style='color:orange; text-align:center;'>⚠️ No audio file detected.</div>", None, None, None

	# Check File Size (50MB Limit)
	file_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
	if file_size_mb > 50:
	return f"""
	<div style="background: {COLORS['panel']}; border: 2px solid {COLORS['danger']}; border-radius: 15px; padding: 20px; text-align: center;">
	<h3 style="color: {COLORS['danger']};">FILE TOO LARGE</h3>
	<p style="color: {COLORS['text']};">Maximum allowed size is <b>50MB</b>.</p>
	<p style="color: {COLORS['subtext']};">Current file: {file_size_mb:.1f}MB</p>
	</div>
	""", None, None, None

	score, sub_scores, y, sr = get_analysis(audio_path)

	percentage = score * 100
	v_color = COLORS["success"] if score < 0.5 else COLORS["danger"]
	label = "AUTHENTIC HUMAN" if score < 0.5 else "AI SYNTHETIC"

	verdict_html = f"""
	<div style="background: {COLORS['panel']}; border-radius: 15px; padding: 30px; text-align: center; border: 1px solid #1e2530; border-top: 5px solid {v_color};">
	<p style="color: {COLORS['subtext']}; text-transform: uppercase; letter-spacing: 2px; margin: 0; font-size: 12px;">Forensic Result</p>
	<h2 style="color: {v_color}; font-size: 38px; margin: 10px 0;">{label}</h2>

	<div style="margin: 20px 0;">
	<span style="color: {v_color}; font-size: 54px; font-weight: bold; font-family: 'JetBrains Mono', monospace;">{percentage:.1f}%</span>
	<p style="color: {COLORS['text']}; margin: 0; opacity: 0.8;">AI Probability Score</p>
	</div>

	<div style="background: #161b22; height: 10px; border-radius: 5px; width: 100%; overflow: hidden;">
	<div style="background: {v_color}; height: 100%; width: {percentage}%; border-radius: 5px; box-shadow: 0 0 15px {v_color}; transition: width 1s ease-in-out;"></div>
	</div>
	</div>
	"""

	radar_fig = go.Figure(go.Scatterpolar(
	r=list(sub_scores.values()) + [list(sub_scores.values())[0]],
	theta=list(sub_scores.keys()) + [list(sub_scores.keys())[0]],
	fill='toself', fillcolor='rgba(112, 0, 255, 0.2)', line=dict(color=COLORS["accent2"])
	))
	radar_fig.update_layout(polar=dict(bgcolor='rgba(0,0,0,0)', radialaxis=dict(visible=False)),
	paper_bgcolor='rgba(0,0,0,0)', showlegend=False, height=300, margin=dict(t=30, b=30))

	spec_plt = plt.figure(figsize=(10, 3))
	D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
	librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', cmap='magma')
	plt.title("FREQUENCY SPECTRUM", color=COLORS["accent"], fontsize=9)
	plt.tight_layout()

	chroma_plt = create_chroma_plt(y, sr)

	return verdict_html, radar_fig, spec_plt, chroma_plt

	# ═══════════════════════════════════════════════════════════════════════════
	# INTERFACE
	# ═══════════════════════════════════════════════════════════════════════════

	CSS = f"""
	.gradio-container {{ background-color: {COLORS['bg']} !important; color: {COLORS['text']} !important; font-family: 'Inter', sans-serif; }}
	.header {{ text-align: center; padding: 25px; border-bottom: 1px solid #1e2530; margin-bottom: 25px; }}
	"""

	with gr.Blocks(css=CSS, title="AI VOICE DETECTOR") as demo:
	with gr.Column(elem_classes=["header"]):
	gr.HTML(f"<h1 style='color:{COLORS['accent']}; letter-spacing:8px; font-weight: bold;'>AI VOICE DETECTOR</h1>")
	gr.Markdown("Neural Forensics Laboratory \| Maximum Upload Limit: 50MB")

	with gr.Row():
	with gr.Column(scale=2):
	audio_in = gr.Audio(label="Input Signal (Max 50MB)", type="filepath")
	run_btn = gr.Button("COMMENCE % ANALYSIS", variant="primary")
	radar_out = gr.Plot(label="Indicator Radar")

	with gr.Column(scale=3):
	verdict_out = gr.HTML("<div style='text-align:center; padding:60px; color:#333; border: 1px dashed #1e2530; border-radius: 15px;'>Ready for signal acquisition...</div>")
	spec_out = gr.Plot(label="Spectrogram Scan")
	chroma_out = gr.Plot(label="Harmonic Chroma Analysis")

	run_btn.click(main_process, audio_in, [verdict_out, radar_out, spec_out, chroma_out])

	if __name__ == "__main__":
	# max_file_size set to 50MB
	demo.launch(max_file_size="50mb")