import gradio as gr import numpy as np import time from transformers import pipeline from PIL import Image, ImageChops from io import BytesIO print("=" * 60) print("[BOOT] Loading models...") # Audio deepfake detection: Gustking (wav2vec2-large-xlsr, labels: fake/real) audio_pipe = pipeline( "audio-classification", model="Gustking/wav2vec2-large-xlsr-deepfake-audio-classification", ) print("[BOOT] Audio model (Gustking) loaded OK") # Image AI detection (binary: AI vs Human) image_pipe = pipeline( "image-classification", model="Ateeqq/ai-vs-human-image-detector", ) print("[BOOT] Image binary model loaded OK") print("[BOOT] Image editing detection: ELA forensic analysis (no ML model)") print("=" * 60) def detect_audio(audio_path): start = time.time() print(f"\n{'='*60}") print(f"[AUDIO] New analysis: {audio_path}") results = audio_pipe(audio_path) scores = {r["label"]: round(r["score"], 4) for r in results} fake = scores.get("spoof", scores.get("fake", 0)) real = scores.get("bonafide", scores.get("real", 0)) elapsed = time.time() - start print(f"[AUDIO] Gustking: fake={fake:.4f} real={real:.4f} raw={scores}") print(f"[AUDIO] Duration: {elapsed:.2f}s") print(f"{'='*60}\n") return {"spoof": round(fake, 4), "bonafide": round(real, 4)} def detect_image(image): start = time.time() print(f"\n{'='*60}") print(f"[IMAGE] New binary analysis") results = image_pipe(image) scores = {r["label"]: round(r["score"], 4) for r in results} elapsed = time.time() - start print(f"[IMAGE] Results: {scores}") print(f"[IMAGE] Duration: {elapsed:.2f}s") print(f"{'='*60}\n") return {r["label"]: r["score"] for r in results} def compute_ela(image, quality=90): """Error Level Analysis: re-save as JPEG and compute pixel difference.""" original = image.convert("RGB") buf = BytesIO() original.save(buf, format="JPEG", quality=quality) buf.seek(0) resaved = Image.open(buf) diff = ImageChops.difference(original, resaved) return np.array(diff).astype(float) def detect_image_editing(image): """Detect editing via ELA + color channel analysis.""" start = time.time() print(f"\n{'='*60}") print(f"[ELA] New editing analysis") img = image.convert("RGB") arr = np.array(img).astype(float) # 1. ELA variance (local editing detection) ela = compute_ela(img, quality=90) ela_std = ela.std() ela_score = min(1.0, ela_std / 25.0) # 2. Channel correlation (global filter detection) r, g, b = arr[:, :, 0].flatten(), arr[:, :, 1].flatten(), arr[:, :, 2].flatten() rg = np.corrcoef(r, g)[0, 1] rb = np.corrcoef(r, b)[0, 1] gb = np.corrcoef(g, b)[0, 1] avg_corr = (rg + rb + gb) / 3.0 # Natural photos: corr ~0.85-0.95. Filtered: < 0.8 corr_score = max(0.0, min(1.0, (0.92 - avg_corr) / 0.25)) # 3. Saturation analysis (oversaturation = filter) sat = (arr.max(axis=2) - arr.min(axis=2)) / 255.0 mean_sat = sat.mean() # Natural: mean_sat ~0.15-0.30. Filtered: > 0.35 sat_score = max(0.0, min(1.0, (mean_sat - 0.25) / 0.20)) # Combined weighted score editing_score = 0.45 * ela_score + 0.35 * corr_score + 0.20 * sat_score editing_score = round(min(1.0, max(0.0, editing_score)), 4) result = { "Edited": editing_score, "Authentic": round(1.0 - editing_score, 4), } elapsed = time.time() - start print(f"[ELA] Results: {result}") print(f"[ELA] Components: ela={ela_score:.4f} corr={corr_score:.4f} sat={sat_score:.4f}") print(f"[ELA] Duration: {elapsed:.2f}s") print(f"{'='*60}\n") return result with gr.Blocks() as demo: gr.Markdown("# Danaya AI Detection Space") gr.Markdown("Audio deepfake + Image AI detection + Image editing detection (ELA)") with gr.Tab("Audio"): audio_input = gr.Audio(type="filepath") audio_output = gr.Label() audio_btn = gr.Button("Detect Audio") audio_btn.click(fn=detect_audio, inputs=audio_input, outputs=audio_output, api_name="detect_audio") with gr.Tab("Image"): image_input = gr.Image(type="pil") image_output = gr.Label() image_btn = gr.Button("Detect Image") image_btn.click(fn=detect_image, inputs=image_input, outputs=image_output, api_name="detect_image") with gr.Tab("Image Editing"): editing_input = gr.Image(type="pil") editing_output = gr.Label() editing_btn = gr.Button("Detect Editing (ELA)") editing_btn.click(fn=detect_image_editing, inputs=editing_input, outputs=editing_output, api_name="detect_image_editing") demo.launch()