Spaces:
Running
Running
| import gradio as gr | |
| import numpy as np | |
| import time | |
| from transformers import pipeline | |
| from PIL import Image, ImageChops | |
| from io import BytesIO | |
| print("=" * 60) | |
| print("[BOOT] Loading models...") | |
| # Audio deepfake detection: Gustking (wav2vec2-large-xlsr, labels: fake/real) | |
| audio_pipe = pipeline( | |
| "audio-classification", | |
| model="Gustking/wav2vec2-large-xlsr-deepfake-audio-classification", | |
| ) | |
| print("[BOOT] Audio model (Gustking) loaded OK") | |
| # Image AI detection (binary: AI vs Human) | |
| image_pipe = pipeline( | |
| "image-classification", | |
| model="Ateeqq/ai-vs-human-image-detector", | |
| ) | |
| print("[BOOT] Image binary model loaded OK") | |
| print("[BOOT] Image editing detection: ELA forensic analysis (no ML model)") | |
| print("=" * 60) | |
| def detect_audio(audio_path): | |
| start = time.time() | |
| print(f"\n{'='*60}") | |
| print(f"[AUDIO] New analysis: {audio_path}") | |
| results = audio_pipe(audio_path) | |
| scores = {r["label"]: round(r["score"], 4) for r in results} | |
| fake = scores.get("spoof", scores.get("fake", 0)) | |
| real = scores.get("bonafide", scores.get("real", 0)) | |
| elapsed = time.time() - start | |
| print(f"[AUDIO] Gustking: fake={fake:.4f} real={real:.4f} raw={scores}") | |
| print(f"[AUDIO] Duration: {elapsed:.2f}s") | |
| print(f"{'='*60}\n") | |
| return {"spoof": round(fake, 4), "bonafide": round(real, 4)} | |
| def detect_image(image): | |
| start = time.time() | |
| print(f"\n{'='*60}") | |
| print(f"[IMAGE] New binary analysis") | |
| results = image_pipe(image) | |
| scores = {r["label"]: round(r["score"], 4) for r in results} | |
| elapsed = time.time() - start | |
| print(f"[IMAGE] Results: {scores}") | |
| print(f"[IMAGE] Duration: {elapsed:.2f}s") | |
| print(f"{'='*60}\n") | |
| return {r["label"]: r["score"] for r in results} | |
| def compute_ela(image, quality=90): | |
| """Error Level Analysis: re-save as JPEG and compute pixel difference.""" | |
| original = image.convert("RGB") | |
| buf = BytesIO() | |
| original.save(buf, format="JPEG", quality=quality) | |
| buf.seek(0) | |
| resaved = Image.open(buf) | |
| diff = ImageChops.difference(original, resaved) | |
| return np.array(diff).astype(float) | |
| def detect_image_editing(image): | |
| """Detect editing via ELA + color channel analysis.""" | |
| start = time.time() | |
| print(f"\n{'='*60}") | |
| print(f"[ELA] New editing analysis") | |
| img = image.convert("RGB") | |
| arr = np.array(img).astype(float) | |
| # 1. ELA variance (local editing detection) | |
| ela = compute_ela(img, quality=90) | |
| ela_std = ela.std() | |
| ela_score = min(1.0, ela_std / 25.0) | |
| # 2. Channel correlation (global filter detection) | |
| r, g, b = arr[:, :, 0].flatten(), arr[:, :, 1].flatten(), arr[:, :, 2].flatten() | |
| rg = np.corrcoef(r, g)[0, 1] | |
| rb = np.corrcoef(r, b)[0, 1] | |
| gb = np.corrcoef(g, b)[0, 1] | |
| avg_corr = (rg + rb + gb) / 3.0 | |
| # Natural photos: corr ~0.85-0.95. Filtered: < 0.8 | |
| corr_score = max(0.0, min(1.0, (0.92 - avg_corr) / 0.25)) | |
| # 3. Saturation analysis (oversaturation = filter) | |
| sat = (arr.max(axis=2) - arr.min(axis=2)) / 255.0 | |
| mean_sat = sat.mean() | |
| # Natural: mean_sat ~0.15-0.30. Filtered: > 0.35 | |
| sat_score = max(0.0, min(1.0, (mean_sat - 0.25) / 0.20)) | |
| # Combined weighted score | |
| editing_score = 0.45 * ela_score + 0.35 * corr_score + 0.20 * sat_score | |
| editing_score = round(min(1.0, max(0.0, editing_score)), 4) | |
| result = { | |
| "Edited": editing_score, | |
| "Authentic": round(1.0 - editing_score, 4), | |
| } | |
| elapsed = time.time() - start | |
| print(f"[ELA] Results: {result}") | |
| print(f"[ELA] Components: ela={ela_score:.4f} corr={corr_score:.4f} sat={sat_score:.4f}") | |
| print(f"[ELA] Duration: {elapsed:.2f}s") | |
| print(f"{'='*60}\n") | |
| return result | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Danaya AI Detection Space") | |
| gr.Markdown("Audio deepfake + Image AI detection + Image editing detection (ELA)") | |
| with gr.Tab("Audio"): | |
| audio_input = gr.Audio(type="filepath") | |
| audio_output = gr.Label() | |
| audio_btn = gr.Button("Detect Audio") | |
| audio_btn.click(fn=detect_audio, inputs=audio_input, outputs=audio_output, api_name="detect_audio") | |
| with gr.Tab("Image"): | |
| image_input = gr.Image(type="pil") | |
| image_output = gr.Label() | |
| image_btn = gr.Button("Detect Image") | |
| image_btn.click(fn=detect_image, inputs=image_input, outputs=image_output, api_name="detect_image") | |
| with gr.Tab("Image Editing"): | |
| editing_input = gr.Image(type="pil") | |
| editing_output = gr.Label() | |
| editing_btn = gr.Button("Detect Editing (ELA)") | |
| editing_btn.click(fn=detect_image_editing, inputs=editing_input, outputs=editing_output, api_name="detect_image_editing") | |
| demo.launch() | |