VERIDEX.V1 / backend /main.py
shadow55gh
fix: real ELA heatmap, real face boxes, real DCT freq, fix HF URLs
c5ec583
"""
VERIDEX β€” FastAPI Backend v4.0
================================
ONE SERVER: FastAPI serves frontend + handles all API endpoints.
Startup loads 6 ML models:
1. MTCNN β€” face detection
2. FaceNet β€” face embeddings (vggface2)
3. ViT β€” deepfake detection (prithivMLmods)
4. CLIP β€” AI image scoring (openai/clip-vit-base-patch32)
5. SDXL Detector β€” Stable Diffusion detection (Organika/sdxl-detector)
6. GAN Detector β€” GAN artifact detection (saltacc/anime-ai-detect)
Run: uvicorn main:app --reload --port 8000
Open: http://localhost:8000
"""
import os, hashlib, time, uuid, torch
from pathlib import Path
from contextlib import asynccontextmanager
from loguru import logger
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
# ── Auto-create required directories ────────────────────────────
for _d in ["reports", "weights", "uploads", "hf_cache", "visuals"]:
os.makedirs(_d, exist_ok=True)
MODELS_DIR = Path("hf_cache")
# ── Internal imports ─────────────────────────────────────────────
from models.deepfake_detector import analyze_image, load_all_models
from models.synth_id_detector import detect_synth_id
from models.audio_forensics import analyze_audio
from models.video_model import analyze_video
from utils.module_runner import run_enabled_modules, MODULE_NAMES, MODULE_WEIGHTS
from utils.report_generator import generate_pdf_report
from utils.hashing import file_fingerprint, sha256 as _sha256, md5 as _md5
from utils.visualization import ela_heatmap, module_score_chart
from utils.metadata_extractor import extract_metadata
from routes.analyze import router as analyze_router
from routes.report import router as report_router
# ── Lifespan: load all 6 models on startup ───────────────────────
@asynccontextmanager
async def lifespan(app: FastAPI):
logger.info("=" * 55)
logger.info(" VERIDEX v4.0 β€” Loading ML Models")
logger.info("=" * 55)
await load_all_models(MODELS_DIR)
logger.success("πŸš€ All models ready β€” VERIDEX is online!")
logger.info("=" * 55)
yield
logger.info("VERIDEX shutting down.")
# ── App ──────────────────────────────────────────────────────────
app = FastAPI(
title="VERIDEX API v4",
version="4.0.0",
description="AI-powered deepfake & synthetic media forensics platform",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ── Register sub-routers ─────────────────────────────────────────
app.include_router(analyze_router)
app.include_router(report_router)
# ── MIME helpers ─────────────────────────────────────────────────
ALLOWED_IMAGE = {"image/jpeg","image/png","image/webp","image/tiff","image/bmp"}
ALLOWED_VIDEO = {"video/mp4","video/quicktime","video/webm","video/x-msvideo"}
ALLOWED_AUDIO = {"audio/mpeg","audio/wav","audio/flac","audio/ogg","audio/aac"}
ALLOWED_DOC = {"application/pdf"}
ALL_ALLOWED = ALLOWED_IMAGE | ALLOWED_VIDEO | ALLOWED_AUDIO | ALLOWED_DOC
EXT_MAP = {
"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png","webp":"image/webp",
"tiff":"image/tiff","bmp":"image/bmp","mp4":"video/mp4","mov":"video/quicktime",
"webm":"video/webm","avi":"video/x-msvideo","mp3":"audio/mpeg","wav":"audio/wav",
"flac":"audio/flac","ogg":"audio/ogg","aac":"audio/aac","pdf":"application/pdf",
}
# ── Main /analyze endpoint ───────────────────────────────────────
@app.post("/analyze")
async def analyze(file: UploadFile = File(...), modules: str = "all"):
# ── Resolve content type ─────────────────────────────────────
ct = file.content_type or ""
if ct not in ALL_ALLOWED or ct in ("application/octet-stream", ""):
ext = (file.filename or "").rsplit(".", 1)[-1].lower()
ct = EXT_MAP.get(ext, ct)
if ct not in ALL_ALLOWED:
raise HTTPException(415, f"Unsupported file type: {ct}")
content = await file.read()
if not content or len(content) < 50:
raise HTTPException(400, "File is empty or too small")
# Sanitize filename β€” remove special/unicode characters
import re, unicodedata
raw_name = file.filename or "unknown"
safe_name = unicodedata.normalize("NFKD", raw_name)
safe_name = safe_name.encode("ascii", "ignore").decode("ascii")
safe_name = re.sub(r'[^\w\.\-]', '_', safe_name)
file.filename = safe_name or "upload"
# ── Hashing & metadata ───────────────────────────────────────
hashes = file_fingerprint(content, file.filename or "")
case_id = f"VRX-{int(time.time())}-{uuid.uuid4().hex[:6].upper()}"
timestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
enabled = (
list(range(1, 47)) if modules == "all"
else [int(x) for x in modules.split(",") if x.strip().isdigit()]
)
synth_result = None
metadata = {}
verdict = "UNKNOWN"
confidence = 0.5
module_scores = {}
custody = ""
key_findings = []
ai_summary = ""
heatmap_path = None
chart_path = None
try:
# ── IMAGE ────────────────────────────────────────────────
if ct in ALLOWED_IMAGE:
# 46-module signal analysis
results = await run_enabled_modules(content, ct, enabled)
# 6-model ML analysis
dl = await analyze_image(content)
# EfficientNet fake probability (0=fake, 1=real in our model)
effnet_fake = dl.get("effnet_score", dl.get("fake_prob", 0.5))
# Merge ML scores into modules
results["module_scores"][15] = 1.0 - effnet_fake # higher = more authentic
results["module_scores"][40] = 1.0 - dl.get("clip_score", 0.5)
results["module_scores"][41] = 1.0 - dl.get("sd_score", 0.5)
results["module_scores"][42] = 1.0 - dl.get("gan_score", 0.5)
# Recalculate verdict with EfficientNet score having high weight
from utils.module_runner import MODULE_WEIGHTS, DEFAULT_WEIGHT
scores = results["module_scores"]
tw = sum(MODULE_WEIGHTS.get(m, DEFAULT_WEIGHT) for m in scores)
ws = sum(MODULE_WEIGHTS.get(m, DEFAULT_WEIGHT) * scores[m] for m in scores)
final_avg = ws / tw if tw > 0 else 0.5
# EfficientNet override β€” 45% weight on final verdict
effnet_authentic = 1.0 - effnet_fake
combined = final_avg * 0.55 + effnet_authentic * 0.45
if combined < 0.40:
verdict = "FAKE"
confidence = combined
elif combined < 0.60:
verdict = "SUSPICIOUS"
confidence = combined
else:
verdict = "AUTHENTIC"
confidence = combined
logger.info(f"[Verdict] modules={final_avg:.3f} effnet={effnet_authentic:.3f} combined={combined:.3f} β†’ {verdict}")
# SynthID watermark (module 46)
if 46 in enabled:
synth_result = await detect_synth_id(content, ct)
if synth_result:
results["module_scores"][46] = 1.0 - synth_result.get("confidence", 0.5)
metadata = extract_metadata(content, ct)
# Inject face boxes and ELA data from ML models into metadata
metadata["faces"] = dl.get("face_boxes", [])
metadata["face_count"] = dl.get("face_count", 0)
metadata["ela_data"] = dl.get("ela_data", "")
metadata["freq_data"] = dl.get("freq_data", {})
module_scores = results["module_scores"]
custody = results["custody"]
key_findings = results.get("key_findings", [])
ai_summary = f"EfficientNet: {effnet_authentic:.1%} authentic | Combined: {combined:.1%} | faces: {dl.get('face_count',0)} | {results.get('ai_summary','')}"
# Visualizations
try:
import base64
heatmap_b64 = ela_heatmap(content)
chart_b64 = module_score_chart(module_scores)
if heatmap_b64:
heatmap_path = f"visuals/{case_id}_heatmap.png"
with open(heatmap_path, "wb") as _f:
_f.write(base64.b64decode(heatmap_b64))
if chart_b64:
chart_path = f"visuals/{case_id}_chart.png"
with open(chart_path, "wb") as _f:
_f.write(base64.b64decode(chart_b64))
except Exception as ve:
logger.warning(f"Visualization skipped: {ve}")
# ── VIDEO ────────────────────────────────────────────────
elif ct in ALLOWED_VIDEO:
vr = await analyze_video(content)
verdict = vr.get("verdict", "UNKNOWN")
confidence = vr.get("confidence", 0.5)
module_scores = {k: v for k, v in vr.items() if isinstance(v, (int, float))}
custody = f"Video: {vr.get('analyzed_frames',0)} frames @ {vr.get('fps',0):.1f}fps"
ai_summary = (
f"Flow consistency: {vr.get('flow_consistency',0):.1%}, "
f"Fake frames: {vr.get('fake_frame_count',0)}"
)
# ── AUDIO ────────────────────────────────────────────────
elif ct in ALLOWED_AUDIO:
ar = await analyze_audio(content)
clone = ar.get("clone_prob", 0.5)
verdict = "SYNTHETIC" if clone > 0.5 else "AUTHENTIC"
confidence = clone if clone > 0.5 else ar.get("authentic_prob", 0.5)
module_scores = {k: v for k, v in ar.items() if isinstance(v, (int, float))}
custody = f"Audio: {ar.get('analyzed_duration',0):.1f}s analyzed"
ai_summary = f"Voice clone probability: {clone:.1%}"
# ── DOCUMENT ─────────────────────────────────────────────
else:
metadata = extract_metadata(content, ct)
verdict = "UNKNOWN"
confidence = 0.5
custody = "Document metadata analysis only"
except Exception as e:
logger.error(f"Analysis error: {e}")
raise HTTPException(500, f"Analysis error: {str(e)}")
risk_score = round(
(1.0 - confidence) * 100 if verdict == "AUTHENTIC" else confidence * 100, 1
)
response = {
"case_id": case_id,
"sha256": hashes["sha256"],
"md5": hashes["md5"],
"timestamp": timestamp,
"file_name": file.filename,
"file_type": ct,
"file_size_bytes": len(content),
"verdict": verdict,
"confidence": round(float(confidence), 4),
"risk_score": risk_score,
"is_synth": synth_result["is_synthetic"] if synth_result else False,
"synth_data": synth_result or {},
"scores": {
str(k): round(float(v), 4)
for k, v in module_scores.items()
if isinstance(v, (int, float))
},
"metadata": metadata,
"enabled_modules": enabled,
"chain_of_custody": custody,
"key_findings": key_findings,
"ai_summary": ai_summary,
"heatmap_url": f"/visuals/{case_id}_heatmap.png" if heatmap_path else None,
"chart_url": f"/visuals/{case_id}_chart.png" if chart_path else None,
}
try:
response["report_path"] = generate_pdf_report(response)
except Exception as e:
logger.warning(f"Report generation failed: {e}")
response["report_path"] = None
return JSONResponse(content=response)
# ── Integrity verification endpoint ─────────────────────────────
@app.post("/verify")
async def verify_file(file: UploadFile = File(...), expected_sha256: str = ""):
content = await file.read()
hashes = file_fingerprint(content, file.filename or "")
ok = (hashes["sha256"] == expected_sha256) if expected_sha256 else None
return {"hashes": hashes, "integrity_ok": ok}
# ── Info endpoints ───────────────────────────────────────────────
@app.get("/modules")
def list_modules():
return {
"total": 46,
"modules": [
{"id": i, "name": MODULE_NAMES.get(i, f"Module {i}"),
"weight": MODULE_WEIGHTS.get(i, 0.02)}
for i in range(1, 47)
],
}
@app.get("/health")
def health():
return {
"status": "online",
"version": "4.0.0",
"modules": 46,
"gpu": torch.cuda.is_available(),
"device": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU",
}
# ── Static mounts ────────────────────────────────────────────────
app.mount("/visuals", StaticFiles(directory="visuals"), name="visuals")
_BASE = os.path.dirname(os.path.abspath(__file__))
_FRONTEND = os.path.join(_BASE, "..", "frontend")
@app.get("/")
def root():
fp = os.path.join(_FRONTEND, "index.html")
if os.path.exists(fp):
return FileResponse(fp, media_type="text/html")
return JSONResponse({"app": "VERIDEX v4", "docs": "/docs"})
if os.path.exists(_FRONTEND):
app.mount("/static", StaticFiles(directory=_FRONTEND), name="static")
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", 8000))
uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)