AiVoiceDetector / app.py
Khalil09's picture
Update app.py
8851579 verified
import os
import warnings
import torch
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import gradio as gr
from transformers import pipeline
warnings.filterwarnings("ignore")
# ── DESIGN TOKENS ──────────────────────────────────────────────────────────
COLORS = {
"bg": "#05070a",
"panel": "#0b0e14",
"accent": "#00f2ff", # Cyan
"accent2": "#7000ff", # Purple
"danger": "#ff0055", # Red
"success": "#00ff88", # Green
"text": "#e0e0e0",
"subtext": "#4e5561"
}
# ═══════════════════════════════════════════════════════════════════════════
# AI VOICE DETECTOR ENGINE
# ═══════════════════════════════════════════════════════════════════════════
MODEL_ID = "garystafford/wav2vec2-deepfake-voice-detector"
try:
device = 0 if torch.cuda.is_available() else -1
pipe = pipeline("audio-classification", model=MODEL_ID, device=device)
except Exception as e:
print(f"Engine Error: {e}")
def create_chroma_plt(y, sr):
plt.style.use('dark_background')
fig, ax = plt.subplots(figsize=(10, 3))
fig.patch.set_facecolor(COLORS["panel"])
ax.set_facecolor(COLORS["panel"])
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=512)
img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax, cmap='coolwarm')
plt.title("HARMONIC ALIGNMENT (CQT CHROMA)", color=COLORS["accent2"], fontsize=10, pad=10)
ax.tick_params(axis='both', colors=COLORS["subtext"], labelsize=7)
plt.colorbar(img, ax=ax).ax.tick_params(colors=COLORS["subtext"])
plt.tight_layout()
return fig
def get_analysis(audio_path):
y, sr = librosa.load(audio_path, sr=16000, duration=10)
results = pipe(audio_path)
scores_dict = {res['label'].upper(): res['score'] for res in results}
ai_prob = scores_dict.get('FAKE', 0)
zcr = np.mean(librosa.feature.zero_crossing_rate(y))
flatness = np.mean(librosa.feature.spectral_flatness(y=y))
sub_indicators = {
"Neural Pulse": ai_prob,
"Spectral Flatness": min(flatness * 25, 1.0),
"Waveform Jitter": float(np.clip(zcr * 10, 0, 1)),
"Harmonic Drift": float(np.clip(1 - (np.std(librosa.feature.chroma_stft(y=y)) * 2), 0, 1))
}
return ai_prob, sub_indicators, y, sr
# ═══════════════════════════════════════════════════════════════════════════
# UI HANDLER (With 50MB File Size Limit)
# ═══════════════════════════════════════════════════════════════════════════
def main_process(audio_path):
if audio_path is None:
return "<div style='color:orange; text-align:center;'>⚠️ No audio file detected.</div>", None, None, None
# Check File Size (50MB Limit)
file_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
if file_size_mb > 50:
return f"""
<div style="background: {COLORS['panel']}; border: 2px solid {COLORS['danger']}; border-radius: 15px; padding: 20px; text-align: center;">
<h3 style="color: {COLORS['danger']};">FILE TOO LARGE</h3>
<p style="color: {COLORS['text']};">Maximum allowed size is <b>50MB</b>.</p>
<p style="color: {COLORS['subtext']};">Current file: {file_size_mb:.1f}MB</p>
</div>
""", None, None, None
score, sub_scores, y, sr = get_analysis(audio_path)
percentage = score * 100
v_color = COLORS["success"] if score < 0.5 else COLORS["danger"]
label = "AUTHENTIC HUMAN" if score < 0.5 else "AI SYNTHETIC"
verdict_html = f"""
<div style="background: {COLORS['panel']}; border-radius: 15px; padding: 30px; text-align: center; border: 1px solid #1e2530; border-top: 5px solid {v_color};">
<p style="color: {COLORS['subtext']}; text-transform: uppercase; letter-spacing: 2px; margin: 0; font-size: 12px;">Forensic Result</p>
<h2 style="color: {v_color}; font-size: 38px; margin: 10px 0;">{label}</h2>
<div style="margin: 20px 0;">
<span style="color: {v_color}; font-size: 54px; font-weight: bold; font-family: 'JetBrains Mono', monospace;">{percentage:.1f}%</span>
<p style="color: {COLORS['text']}; margin: 0; opacity: 0.8;">AI Probability Score</p>
</div>
<div style="background: #161b22; height: 10px; border-radius: 5px; width: 100%; overflow: hidden;">
<div style="background: {v_color}; height: 100%; width: {percentage}%; border-radius: 5px; box-shadow: 0 0 15px {v_color}; transition: width 1s ease-in-out;"></div>
</div>
</div>
"""
radar_fig = go.Figure(go.Scatterpolar(
r=list(sub_scores.values()) + [list(sub_scores.values())[0]],
theta=list(sub_scores.keys()) + [list(sub_scores.keys())[0]],
fill='toself', fillcolor='rgba(112, 0, 255, 0.2)', line=dict(color=COLORS["accent2"])
))
radar_fig.update_layout(polar=dict(bgcolor='rgba(0,0,0,0)', radialaxis=dict(visible=False)),
paper_bgcolor='rgba(0,0,0,0)', showlegend=False, height=300, margin=dict(t=30, b=30))
spec_plt = plt.figure(figsize=(10, 3))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', cmap='magma')
plt.title("FREQUENCY SPECTRUM", color=COLORS["accent"], fontsize=9)
plt.tight_layout()
chroma_plt = create_chroma_plt(y, sr)
return verdict_html, radar_fig, spec_plt, chroma_plt
# ═══════════════════════════════════════════════════════════════════════════
# INTERFACE
# ═══════════════════════════════════════════════════════════════════════════
CSS = f"""
.gradio-container {{ background-color: {COLORS['bg']} !important; color: {COLORS['text']} !important; font-family: 'Inter', sans-serif; }}
.header {{ text-align: center; padding: 25px; border-bottom: 1px solid #1e2530; margin-bottom: 25px; }}
"""
with gr.Blocks(css=CSS, title="AI VOICE DETECTOR") as demo:
with gr.Column(elem_classes=["header"]):
gr.HTML(f"<h1 style='color:{COLORS['accent']}; letter-spacing:8px; font-weight: bold;'>AI VOICE DETECTOR</h1>")
gr.Markdown("Neural Forensics Laboratory | Maximum Upload Limit: 50MB")
with gr.Row():
with gr.Column(scale=2):
audio_in = gr.Audio(label="Input Signal (Max 50MB)", type="filepath")
run_btn = gr.Button("COMMENCE % ANALYSIS", variant="primary")
radar_out = gr.Plot(label="Indicator Radar")
with gr.Column(scale=3):
verdict_out = gr.HTML("<div style='text-align:center; padding:60px; color:#333; border: 1px dashed #1e2530; border-radius: 15px;'>Ready for signal acquisition...</div>")
spec_out = gr.Plot(label="Spectrogram Scan")
chroma_out = gr.Plot(label="Harmonic Chroma Analysis")
run_btn.click(main_process, audio_in, [verdict_out, radar_out, spec_out, chroma_out])
if __name__ == "__main__":
# max_file_size set to 50MB
demo.launch(max_file_size="50mb")