import os import warnings import torch import librosa import librosa.display import numpy as np import matplotlib.pyplot as plt import plotly.graph_objects as go import gradio as gr from transformers import pipeline warnings.filterwarnings("ignore") # ── DESIGN TOKENS ────────────────────────────────────────────────────────── COLORS = { "bg": "#05070a", "panel": "#0b0e14", "accent": "#00f2ff", # Cyan "accent2": "#7000ff", # Purple "danger": "#ff0055", # Red "success": "#00ff88", # Green "text": "#e0e0e0", "subtext": "#4e5561" } # ═══════════════════════════════════════════════════════════════════════════ # AI VOICE DETECTOR ENGINE # ═══════════════════════════════════════════════════════════════════════════ MODEL_ID = "garystafford/wav2vec2-deepfake-voice-detector" try: device = 0 if torch.cuda.is_available() else -1 pipe = pipeline("audio-classification", model=MODEL_ID, device=device) except Exception as e: print(f"Engine Error: {e}") def create_chroma_plt(y, sr): plt.style.use('dark_background') fig, ax = plt.subplots(figsize=(10, 3)) fig.patch.set_facecolor(COLORS["panel"]) ax.set_facecolor(COLORS["panel"]) chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=512) img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax, cmap='coolwarm') plt.title("HARMONIC ALIGNMENT (CQT CHROMA)", color=COLORS["accent2"], fontsize=10, pad=10) ax.tick_params(axis='both', colors=COLORS["subtext"], labelsize=7) plt.colorbar(img, ax=ax).ax.tick_params(colors=COLORS["subtext"]) plt.tight_layout() return fig def get_analysis(audio_path): y, sr = librosa.load(audio_path, sr=16000, duration=10) results = pipe(audio_path) scores_dict = {res['label'].upper(): res['score'] for res in results} ai_prob = scores_dict.get('FAKE', 0) zcr = np.mean(librosa.feature.zero_crossing_rate(y)) flatness = np.mean(librosa.feature.spectral_flatness(y=y)) sub_indicators = { "Neural Pulse": ai_prob, "Spectral Flatness": min(flatness * 25, 1.0), "Waveform Jitter": float(np.clip(zcr * 10, 0, 1)), "Harmonic Drift": float(np.clip(1 - (np.std(librosa.feature.chroma_stft(y=y)) * 2), 0, 1)) } return ai_prob, sub_indicators, y, sr # ═══════════════════════════════════════════════════════════════════════════ # UI HANDLER (With 50MB File Size Limit) # ═══════════════════════════════════════════════════════════════════════════ def main_process(audio_path): if audio_path is None: return "
Maximum allowed size is 50MB.
Current file: {file_size_mb:.1f}MB
Forensic Result
AI Probability Score