import os import warnings import torch import librosa import librosa.display import numpy as np import matplotlib.pyplot as plt import plotly.graph_objects as go import gradio as gr from transformers import pipeline warnings.filterwarnings("ignore") # ── DESIGN TOKENS ────────────────────────────────────────────────────────── COLORS = { "bg": "#05070a", "panel": "#0b0e14", "accent": "#00f2ff", # Cyan "accent2": "#7000ff", # Purple "danger": "#ff0055", # Red "success": "#00ff88", # Green "text": "#e0e0e0", "subtext": "#4e5561" } # ═══════════════════════════════════════════════════════════════════════════ # AI VOICE DETECTOR ENGINE # ═══════════════════════════════════════════════════════════════════════════ MODEL_ID = "garystafford/wav2vec2-deepfake-voice-detector" try: device = 0 if torch.cuda.is_available() else -1 pipe = pipeline("audio-classification", model=MODEL_ID, device=device) except Exception as e: print(f"Engine Error: {e}") def create_chroma_plt(y, sr): plt.style.use('dark_background') fig, ax = plt.subplots(figsize=(10, 3)) fig.patch.set_facecolor(COLORS["panel"]) ax.set_facecolor(COLORS["panel"]) chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=512) img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax, cmap='coolwarm') plt.title("HARMONIC ALIGNMENT (CQT CHROMA)", color=COLORS["accent2"], fontsize=10, pad=10) ax.tick_params(axis='both', colors=COLORS["subtext"], labelsize=7) plt.colorbar(img, ax=ax).ax.tick_params(colors=COLORS["subtext"]) plt.tight_layout() return fig def get_analysis(audio_path): y, sr = librosa.load(audio_path, sr=16000, duration=10) results = pipe(audio_path) scores_dict = {res['label'].upper(): res['score'] for res in results} ai_prob = scores_dict.get('FAKE', 0) zcr = np.mean(librosa.feature.zero_crossing_rate(y)) flatness = np.mean(librosa.feature.spectral_flatness(y=y)) sub_indicators = { "Neural Pulse": ai_prob, "Spectral Flatness": min(flatness * 25, 1.0), "Waveform Jitter": float(np.clip(zcr * 10, 0, 1)), "Harmonic Drift": float(np.clip(1 - (np.std(librosa.feature.chroma_stft(y=y)) * 2), 0, 1)) } return ai_prob, sub_indicators, y, sr # ═══════════════════════════════════════════════════════════════════════════ # UI HANDLER (With 50MB File Size Limit) # ═══════════════════════════════════════════════════════════════════════════ def main_process(audio_path): if audio_path is None: return "
⚠️ No audio file detected.
", None, None, None # Check File Size (50MB Limit) file_size_mb = os.path.getsize(audio_path) / (1024 * 1024) if file_size_mb > 50: return f"""

FILE TOO LARGE

Maximum allowed size is 50MB.

Current file: {file_size_mb:.1f}MB

""", None, None, None score, sub_scores, y, sr = get_analysis(audio_path) percentage = score * 100 v_color = COLORS["success"] if score < 0.5 else COLORS["danger"] label = "AUTHENTIC HUMAN" if score < 0.5 else "AI SYNTHETIC" verdict_html = f"""

Forensic Result

{label}

{percentage:.1f}%

AI Probability Score

""" radar_fig = go.Figure(go.Scatterpolar( r=list(sub_scores.values()) + [list(sub_scores.values())[0]], theta=list(sub_scores.keys()) + [list(sub_scores.keys())[0]], fill='toself', fillcolor='rgba(112, 0, 255, 0.2)', line=dict(color=COLORS["accent2"]) )) radar_fig.update_layout(polar=dict(bgcolor='rgba(0,0,0,0)', radialaxis=dict(visible=False)), paper_bgcolor='rgba(0,0,0,0)', showlegend=False, height=300, margin=dict(t=30, b=30)) spec_plt = plt.figure(figsize=(10, 3)) D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', cmap='magma') plt.title("FREQUENCY SPECTRUM", color=COLORS["accent"], fontsize=9) plt.tight_layout() chroma_plt = create_chroma_plt(y, sr) return verdict_html, radar_fig, spec_plt, chroma_plt # ═══════════════════════════════════════════════════════════════════════════ # INTERFACE # ═══════════════════════════════════════════════════════════════════════════ CSS = f""" .gradio-container {{ background-color: {COLORS['bg']} !important; color: {COLORS['text']} !important; font-family: 'Inter', sans-serif; }} .header {{ text-align: center; padding: 25px; border-bottom: 1px solid #1e2530; margin-bottom: 25px; }} """ with gr.Blocks(css=CSS, title="AI VOICE DETECTOR") as demo: with gr.Column(elem_classes=["header"]): gr.HTML(f"

AI VOICE DETECTOR

") gr.Markdown("Neural Forensics Laboratory | Maximum Upload Limit: 50MB") with gr.Row(): with gr.Column(scale=2): audio_in = gr.Audio(label="Input Signal (Max 50MB)", type="filepath") run_btn = gr.Button("COMMENCE % ANALYSIS", variant="primary") radar_out = gr.Plot(label="Indicator Radar") with gr.Column(scale=3): verdict_out = gr.HTML("
Ready for signal acquisition...
") spec_out = gr.Plot(label="Spectrogram Scan") chroma_out = gr.Plot(label="Harmonic Chroma Analysis") run_btn.click(main_process, audio_in, [verdict_out, radar_out, spec_out, chroma_out]) if __name__ == "__main__": # max_file_size set to 50MB demo.launch(max_file_size="50mb")