Spaces:
Sleeping
Sleeping
| import os | |
| import warnings | |
| import torch | |
| import librosa | |
| import librosa.display | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import plotly.graph_objects as go | |
| import gradio as gr | |
| from transformers import pipeline | |
| warnings.filterwarnings("ignore") | |
| # ββ DESIGN TOKENS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COLORS = { | |
| "bg": "#05070a", | |
| "panel": "#0b0e14", | |
| "accent": "#00f2ff", # Cyan | |
| "accent2": "#7000ff", # Purple | |
| "danger": "#ff0055", # Red | |
| "success": "#00ff88", # Green | |
| "text": "#e0e0e0", | |
| "subtext": "#4e5561" | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # AI VOICE DETECTOR ENGINE | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_ID = "garystafford/wav2vec2-deepfake-voice-detector" | |
| try: | |
| device = 0 if torch.cuda.is_available() else -1 | |
| pipe = pipeline("audio-classification", model=MODEL_ID, device=device) | |
| except Exception as e: | |
| print(f"Engine Error: {e}") | |
| def create_chroma_plt(y, sr): | |
| plt.style.use('dark_background') | |
| fig, ax = plt.subplots(figsize=(10, 3)) | |
| fig.patch.set_facecolor(COLORS["panel"]) | |
| ax.set_facecolor(COLORS["panel"]) | |
| chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=512) | |
| img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax, cmap='coolwarm') | |
| plt.title("HARMONIC ALIGNMENT (CQT CHROMA)", color=COLORS["accent2"], fontsize=10, pad=10) | |
| ax.tick_params(axis='both', colors=COLORS["subtext"], labelsize=7) | |
| plt.colorbar(img, ax=ax).ax.tick_params(colors=COLORS["subtext"]) | |
| plt.tight_layout() | |
| return fig | |
| def get_analysis(audio_path): | |
| y, sr = librosa.load(audio_path, sr=16000, duration=10) | |
| results = pipe(audio_path) | |
| scores_dict = {res['label'].upper(): res['score'] for res in results} | |
| ai_prob = scores_dict.get('FAKE', 0) | |
| zcr = np.mean(librosa.feature.zero_crossing_rate(y)) | |
| flatness = np.mean(librosa.feature.spectral_flatness(y=y)) | |
| sub_indicators = { | |
| "Neural Pulse": ai_prob, | |
| "Spectral Flatness": min(flatness * 25, 1.0), | |
| "Waveform Jitter": float(np.clip(zcr * 10, 0, 1)), | |
| "Harmonic Drift": float(np.clip(1 - (np.std(librosa.feature.chroma_stft(y=y)) * 2), 0, 1)) | |
| } | |
| return ai_prob, sub_indicators, y, sr | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # UI HANDLER (With 50MB File Size Limit) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def main_process(audio_path): | |
| if audio_path is None: | |
| return "<div style='color:orange; text-align:center;'>β οΈ No audio file detected.</div>", None, None, None | |
| # Check File Size (50MB Limit) | |
| file_size_mb = os.path.getsize(audio_path) / (1024 * 1024) | |
| if file_size_mb > 50: | |
| return f""" | |
| <div style="background: {COLORS['panel']}; border: 2px solid {COLORS['danger']}; border-radius: 15px; padding: 20px; text-align: center;"> | |
| <h3 style="color: {COLORS['danger']};">FILE TOO LARGE</h3> | |
| <p style="color: {COLORS['text']};">Maximum allowed size is <b>50MB</b>.</p> | |
| <p style="color: {COLORS['subtext']};">Current file: {file_size_mb:.1f}MB</p> | |
| </div> | |
| """, None, None, None | |
| score, sub_scores, y, sr = get_analysis(audio_path) | |
| percentage = score * 100 | |
| v_color = COLORS["success"] if score < 0.5 else COLORS["danger"] | |
| label = "AUTHENTIC HUMAN" if score < 0.5 else "AI SYNTHETIC" | |
| verdict_html = f""" | |
| <div style="background: {COLORS['panel']}; border-radius: 15px; padding: 30px; text-align: center; border: 1px solid #1e2530; border-top: 5px solid {v_color};"> | |
| <p style="color: {COLORS['subtext']}; text-transform: uppercase; letter-spacing: 2px; margin: 0; font-size: 12px;">Forensic Result</p> | |
| <h2 style="color: {v_color}; font-size: 38px; margin: 10px 0;">{label}</h2> | |
| <div style="margin: 20px 0;"> | |
| <span style="color: {v_color}; font-size: 54px; font-weight: bold; font-family: 'JetBrains Mono', monospace;">{percentage:.1f}%</span> | |
| <p style="color: {COLORS['text']}; margin: 0; opacity: 0.8;">AI Probability Score</p> | |
| </div> | |
| <div style="background: #161b22; height: 10px; border-radius: 5px; width: 100%; overflow: hidden;"> | |
| <div style="background: {v_color}; height: 100%; width: {percentage}%; border-radius: 5px; box-shadow: 0 0 15px {v_color}; transition: width 1s ease-in-out;"></div> | |
| </div> | |
| </div> | |
| """ | |
| radar_fig = go.Figure(go.Scatterpolar( | |
| r=list(sub_scores.values()) + [list(sub_scores.values())[0]], | |
| theta=list(sub_scores.keys()) + [list(sub_scores.keys())[0]], | |
| fill='toself', fillcolor='rgba(112, 0, 255, 0.2)', line=dict(color=COLORS["accent2"]) | |
| )) | |
| radar_fig.update_layout(polar=dict(bgcolor='rgba(0,0,0,0)', radialaxis=dict(visible=False)), | |
| paper_bgcolor='rgba(0,0,0,0)', showlegend=False, height=300, margin=dict(t=30, b=30)) | |
| spec_plt = plt.figure(figsize=(10, 3)) | |
| D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) | |
| librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', cmap='magma') | |
| plt.title("FREQUENCY SPECTRUM", color=COLORS["accent"], fontsize=9) | |
| plt.tight_layout() | |
| chroma_plt = create_chroma_plt(y, sr) | |
| return verdict_html, radar_fig, spec_plt, chroma_plt | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # INTERFACE | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = f""" | |
| .gradio-container {{ background-color: {COLORS['bg']} !important; color: {COLORS['text']} !important; font-family: 'Inter', sans-serif; }} | |
| .header {{ text-align: center; padding: 25px; border-bottom: 1px solid #1e2530; margin-bottom: 25px; }} | |
| """ | |
| with gr.Blocks(css=CSS, title="AI VOICE DETECTOR") as demo: | |
| with gr.Column(elem_classes=["header"]): | |
| gr.HTML(f"<h1 style='color:{COLORS['accent']}; letter-spacing:8px; font-weight: bold;'>AI VOICE DETECTOR</h1>") | |
| gr.Markdown("Neural Forensics Laboratory | Maximum Upload Limit: 50MB") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| audio_in = gr.Audio(label="Input Signal (Max 50MB)", type="filepath") | |
| run_btn = gr.Button("COMMENCE % ANALYSIS", variant="primary") | |
| radar_out = gr.Plot(label="Indicator Radar") | |
| with gr.Column(scale=3): | |
| verdict_out = gr.HTML("<div style='text-align:center; padding:60px; color:#333; border: 1px dashed #1e2530; border-radius: 15px;'>Ready for signal acquisition...</div>") | |
| spec_out = gr.Plot(label="Spectrogram Scan") | |
| chroma_out = gr.Plot(label="Harmonic Chroma Analysis") | |
| run_btn.click(main_process, audio_in, [verdict_out, radar_out, spec_out, chroma_out]) | |
| if __name__ == "__main__": | |
| # max_file_size set to 50MB | |
| demo.launch(max_file_size="50mb") |