# app.py — This is your main Hugging Face Spaces app import gradio as gr import torch import numpy as np import librosa import cv2 import os import matplotlib.pyplot as plt import matplotlib matplotlib.use('Agg') from PIL import Image import io import base64 import warnings warnings.filterwarnings('ignore') # ─── Paste your model classes here (or import them) ────────────────────────── # Copy the AudioDeepfakeDetector and VideoDeepfakeDetector classes from above # OR use import statements if you structured it as a package: # from model.audio_detector import AudioDeepfakeDetector, extract_audio_features, predict_audio # from model.video_detector import VideoDeepfakeDetector, predict_video # For Spaces, we'll use lightweight pretrained models from HuggingFace Hub # as fallback if custom models aren't trained yet: from transformers import pipeline # Load pre-trained audio classifier (for demo) # For production, replace with your trained model weights AUDIO_MODEL_PATH = "audio_model.pth" VIDEO_MODEL_PATH = "video_model.pth" USE_TRAINED_MODEL = os.path.exists(AUDIO_MODEL_PATH) # ─── Fallback: Use transformers pipeline ───────────────────────────────────── def analyze_audio_transformers(audio_path): """ Uses a HuggingFace pipeline for audio classification. Replace this with your trained model for better accuracy. """ try: classifier = pipeline( "audio-classification", model="facebook/wav2vec2-base", # For real deepfake detection use: # model="mo-aqrabi/deepfake-audio-detection" ) # This is a placeholder — replace with actual deepfake model # For now returns heuristic based on spectral analysis y, sr = librosa.load(audio_path, sr=16000, duration=5) mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) zcr = librosa.feature.zero_crossing_rate(y) spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr) # Heuristic: AI audio tends to have very regular ZCR and centroid zcr_regularity = 1 - (np.std(zcr) / (np.mean(zcr) + 1e-6)) spectral_regularity = 1 - (np.std(spectral_centroid) / (np.mean(spectral_centroid) + 1e-6)) # Combine into a rough AI score ai_score = np.clip((zcr_regularity + spectral_regularity) / 2, 0, 1) return float(ai_score) except Exception as e: print(f"Fallback audio analysis error: {e}") return 0.5 # Uncertain # ─── Main Detection Functions ───────────────────────────────────────────────── def detect_audio(audio_file): """Full audio analysis pipeline.""" if audio_file is None: return None, "❌ No audio file provided", None try: if USE_TRAINED_MODEL: score = predict_audio(audio_file, AUDIO_MODEL_PATH) else: score = analyze_audio_transformers(audio_file) percentage = score * 100 verdict = "🤖 AI GENERATED" if score > 0.5 else "✅ REAL / HUMAN" confidence = max(score, 1 - score) * 100 # Generate waveform + spectrogram plot y, sr = librosa.load(audio_file, sr=16000, duration=10) fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 6)) fig.patch.set_facecolor('#0d0d0d') # Waveform ax1.set_facecolor('#1a1a2e') librosa.display.waveshow(y, sr=sr, ax=ax1, color='#00d4ff') ax1.set_title('Audio Waveform', color='white', fontsize=12) ax1.tick_params(colors='white') # Mel Spectrogram ax2.set_facecolor('#1a1a2e') mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) mel_db = librosa.power_to_db(mel, ref=np.max) img = librosa.display.specshow(mel_db, sr=sr, ax=ax2, x_axis='time', y_axis='mel', cmap='magma') plt.colorbar(img, ax=ax2, format='%+2.0f dB') ax2.set_title('Mel Spectrogram', color='white', fontsize=12) ax2.tick_params(colors='white') ax2.yaxis.label.set_color('white') ax2.xaxis.label.set_color('white') plt.tight_layout() plot_path = '/tmp/audio_analysis.png' plt.savefig(plot_path, facecolor='#0d0d0d', bbox_inches='tight') plt.close() result_text = f""" ## 🔊 Audio Analysis Result | Metric | Value | |--------|-------| | **AI Probability** | {percentage:.1f}% | | **Verdict** | {verdict} | | **Confidence** | {confidence:.1f}% | | **Status** | {'⚠️ HIGH RISK' if percentage > 75 else '🟡 SUSPICIOUS' if percentage > 50 else '🟢 LIKELY REAL'} | ### Interpretation - **0-30%**: Very likely genuine/human-created - **30-50%**: Possibly human, some AI characteristics - **50-70%**: Likely AI-generated, needs verification - **70-100%**: Almost certainly AI-generated """ return plot_path, result_text, f"{percentage:.1f}%" except Exception as e: return None, f"❌ Error analyzing audio: {str(e)}", "N/A" def detect_video(video_file): """Full video analysis pipeline.""" if video_file is None: return None, "❌ No video file provided", None try: if USE_TRAINED_MODEL and os.path.exists(VIDEO_MODEL_PATH): result = predict_video(video_file, VIDEO_MODEL_PATH) score = result['final_score'] frame_scores = result['per_frame_scores'] temporal = result['temporal_score'] else: # Fallback: basic frame analysis score, frame_scores, temporal = analyze_video_basic(video_file) percentage = score * 100 verdict = "🤖 AI GENERATED" if score > 0.5 else "✅ REAL / HUMAN" # Generate frame score visualization fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) fig.patch.set_facecolor('#0d0d0d') # Frame scores over time ax1.set_facecolor('#1a1a2e') frames_x = range(len(frame_scores)) ax1.plot(frames_x, [f * 100 for f in frame_scores], 'cyan', linewidth=2) ax1.fill_between(frames_x, [f * 100 for f in frame_scores], alpha=0.3, color='cyan') ax1.axhline(y=50, color='red', linestyle='--', alpha=0.7, label='Threshold (50%)') ax1.set_facecolor('#1a1a2e') ax1.set_xlabel('Frame', color='white') ax1.set_ylabel('AI Score (%)', color='white') ax1.set_title('Per-Frame AI Score', color='white') ax1.tick_params(colors='white') ax1.legend(facecolor='#1a1a2e', labelcolor='white') ax1.set_ylim(0, 100) # Score breakdown pie ax2.set_facecolor('#1a1a2e') sizes = [score * 100, (1 - score) * 100] colors_pie = ['#FF4444', '#00CC44'] wedges, texts, autotexts = ax2.pie(sizes, labels=['AI Generated', 'Real/Human'], colors=colors_pie, autopct='%1.1f%%', textprops={'color': 'white', 'fontsize': 12}, startangle=90) ax2.set_title(f'Final Verdict: {verdict}', color='white', fontsize=13) plt.tight_layout() plot_path = '/tmp/video_analysis.png' plt.savefig(plot_path, facecolor='#0d0d0d', bbox_inches='tight') plt.close() result_text = f""" ## 🎬 Video Analysis Result | Metric | Value | |--------|-------| | **AI Probability** | {percentage:.1f}% | | **Verdict** | {verdict} | | **Frames Analyzed** | {len(frame_scores)} | | **Temporal Inconsistency** | {temporal*100:.1f}% | | **Status** | {'⚠️ HIGH RISK' if percentage > 75 else '🟡 SUSPICIOUS' if percentage > 50 else '🟢 LIKELY REAL'} | ### What we checked: - ✓ Face region analysis per frame - ✓ Temporal consistency between frames - ✓ Artifact patterns typical of AI generation - ✓ Natural motion flow analysis """ return plot_path, result_text, f"{percentage:.1f}%" except Exception as e: return None, f"❌ Error analyzing video: {str(e)}", "N/A" def analyze_video_basic(video_path): """Basic video analysis fallback without trained model.""" cap = cv2.VideoCapture(video_path) total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) indices = np.linspace(0, total - 1, 20, dtype=int) frame_scores = [] prev_gray = None for idx in indices: cap.set(cv2.CAP_PROP_POS_FRAMES, idx) ret, frame = cap.read() if not ret: continue gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if prev_gray is not None: # Check for unnatural sharpness transitions (common in deepfakes) diff = cv2.absdiff(gray, prev_gray) score = float(np.std(diff)) / 50.0 score = np.clip(score, 0, 1) frame_scores.append(score) prev_gray = gray cap.release() if not frame_scores: return 0.5, [0.5], 0.5 avg_score = np.mean(frame_scores) temporal = float(np.std(frame_scores)) return avg_score, frame_scores, min(temporal, 1.0) # ─── Gradio UI ──────────────────────────────────────────────────────────────── def build_ui(): with gr.Blocks( title="🔍 DeepFake AI Detector", theme=gr.themes.Base( primary_hue="blue", neutral_hue="slate", ), css=""" .gradio-container { max-width: 1100px; margin: auto; } .result-box { border-radius: 12px; padding: 16px; } h1 { text-align: center; } .score-display { font-size: 48px; font-weight: bold; text-align: center; } """ ) as demo: gr.HTML("""
Upload audio or video to detect AI generation.
Get a precise percentage score of how much AI was used.