Audio_detection / app.py
Yuji787's picture
Update app.py
25887c7 verified
Raw
History Blame Contribute Delete
4.69 kB
import gradio as gr
import numpy as np
import time
from transformers import pipeline
from PIL import Image, ImageChops
from io import BytesIO
print("=" * 60)
print("[BOOT] Loading models...")
# Audio deepfake detection: Gustking (wav2vec2-large-xlsr, labels: fake/real)
audio_pipe = pipeline(
"audio-classification",
model="Gustking/wav2vec2-large-xlsr-deepfake-audio-classification",
)
print("[BOOT] Audio model (Gustking) loaded OK")
# Image AI detection (binary: AI vs Human)
image_pipe = pipeline(
"image-classification",
model="Ateeqq/ai-vs-human-image-detector",
)
print("[BOOT] Image binary model loaded OK")
print("[BOOT] Image editing detection: ELA forensic analysis (no ML model)")
print("=" * 60)
def detect_audio(audio_path):
start = time.time()
print(f"\n{'='*60}")
print(f"[AUDIO] New analysis: {audio_path}")
results = audio_pipe(audio_path)
scores = {r["label"]: round(r["score"], 4) for r in results}
fake = scores.get("spoof", scores.get("fake", 0))
real = scores.get("bonafide", scores.get("real", 0))
elapsed = time.time() - start
print(f"[AUDIO] Gustking: fake={fake:.4f} real={real:.4f} raw={scores}")
print(f"[AUDIO] Duration: {elapsed:.2f}s")
print(f"{'='*60}\n")
return {"spoof": round(fake, 4), "bonafide": round(real, 4)}
def detect_image(image):
start = time.time()
print(f"\n{'='*60}")
print(f"[IMAGE] New binary analysis")
results = image_pipe(image)
scores = {r["label"]: round(r["score"], 4) for r in results}
elapsed = time.time() - start
print(f"[IMAGE] Results: {scores}")
print(f"[IMAGE] Duration: {elapsed:.2f}s")
print(f"{'='*60}\n")
return {r["label"]: r["score"] for r in results}
def compute_ela(image, quality=90):
"""Error Level Analysis: re-save as JPEG and compute pixel difference."""
original = image.convert("RGB")
buf = BytesIO()
original.save(buf, format="JPEG", quality=quality)
buf.seek(0)
resaved = Image.open(buf)
diff = ImageChops.difference(original, resaved)
return np.array(diff).astype(float)
def detect_image_editing(image):
"""Detect editing via ELA + color channel analysis."""
start = time.time()
print(f"\n{'='*60}")
print(f"[ELA] New editing analysis")
img = image.convert("RGB")
arr = np.array(img).astype(float)
# 1. ELA variance (local editing detection)
ela = compute_ela(img, quality=90)
ela_std = ela.std()
ela_score = min(1.0, ela_std / 25.0)
# 2. Channel correlation (global filter detection)
r, g, b = arr[:, :, 0].flatten(), arr[:, :, 1].flatten(), arr[:, :, 2].flatten()
rg = np.corrcoef(r, g)[0, 1]
rb = np.corrcoef(r, b)[0, 1]
gb = np.corrcoef(g, b)[0, 1]
avg_corr = (rg + rb + gb) / 3.0
# Natural photos: corr ~0.85-0.95. Filtered: < 0.8
corr_score = max(0.0, min(1.0, (0.92 - avg_corr) / 0.25))
# 3. Saturation analysis (oversaturation = filter)
sat = (arr.max(axis=2) - arr.min(axis=2)) / 255.0
mean_sat = sat.mean()
# Natural: mean_sat ~0.15-0.30. Filtered: > 0.35
sat_score = max(0.0, min(1.0, (mean_sat - 0.25) / 0.20))
# Combined weighted score
editing_score = 0.45 * ela_score + 0.35 * corr_score + 0.20 * sat_score
editing_score = round(min(1.0, max(0.0, editing_score)), 4)
result = {
"Edited": editing_score,
"Authentic": round(1.0 - editing_score, 4),
}
elapsed = time.time() - start
print(f"[ELA] Results: {result}")
print(f"[ELA] Components: ela={ela_score:.4f} corr={corr_score:.4f} sat={sat_score:.4f}")
print(f"[ELA] Duration: {elapsed:.2f}s")
print(f"{'='*60}\n")
return result
with gr.Blocks() as demo:
gr.Markdown("# Danaya AI Detection Space")
gr.Markdown("Audio deepfake + Image AI detection + Image editing detection (ELA)")
with gr.Tab("Audio"):
audio_input = gr.Audio(type="filepath")
audio_output = gr.Label()
audio_btn = gr.Button("Detect Audio")
audio_btn.click(fn=detect_audio, inputs=audio_input, outputs=audio_output, api_name="detect_audio")
with gr.Tab("Image"):
image_input = gr.Image(type="pil")
image_output = gr.Label()
image_btn = gr.Button("Detect Image")
image_btn.click(fn=detect_image, inputs=image_input, outputs=image_output, api_name="detect_image")
with gr.Tab("Image Editing"):
editing_input = gr.Image(type="pil")
editing_output = gr.Label()
editing_btn = gr.Button("Detect Editing (ELA)")
editing_btn.click(fn=detect_image_editing, inputs=editing_input, outputs=editing_output, api_name="detect_image_editing")
demo.launch()