Spaces:

shadow861453
/

VERIDEX.V1

Sleeping

shadow55gh

fix: real ELA heatmap, real face boxes, real DCT freq, fix HF URLs

c5ec583 about 1 month ago

15.1 kB

	"""
	VERIDEX — FastAPI Backend v4.0
	================================
	ONE SERVER: FastAPI serves frontend + handles all API endpoints.

	Startup loads 6 ML models:
	1. MTCNN — face detection
	2. FaceNet — face embeddings (vggface2)
	3. ViT — deepfake detection (prithivMLmods)
	4. CLIP — AI image scoring (openai/clip-vit-base-patch32)
	5. SDXL Detector — Stable Diffusion detection (Organika/sdxl-detector)
	6. GAN Detector — GAN artifact detection (saltacc/anime-ai-detect)

	Run: uvicorn main:app --reload --port 8000
	Open: http://localhost:8000
	"""

	import os, hashlib, time, uuid, torch
	from pathlib import Path
	from contextlib import asynccontextmanager
	from loguru import logger

	from fastapi import FastAPI, UploadFile, File, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import FileResponse, JSONResponse
	from fastapi.staticfiles import StaticFiles

	# ── Auto-create required directories ────────────────────────────
	for _d in ["reports", "weights", "uploads", "hf_cache", "visuals"]:
	os.makedirs(_d, exist_ok=True)

	MODELS_DIR = Path("hf_cache")

	# ── Internal imports ─────────────────────────────────────────────
	from models.deepfake_detector import analyze_image, load_all_models
	from models.synth_id_detector import detect_synth_id
	from models.audio_forensics import analyze_audio
	from models.video_model import analyze_video
	from utils.module_runner import run_enabled_modules, MODULE_NAMES, MODULE_WEIGHTS
	from utils.report_generator import generate_pdf_report
	from utils.hashing import file_fingerprint, sha256 as _sha256, md5 as _md5
	from utils.visualization import ela_heatmap, module_score_chart
	from utils.metadata_extractor import extract_metadata
	from routes.analyze import router as analyze_router
	from routes.report import router as report_router


	# ── Lifespan: load all 6 models on startup ───────────────────────
	@asynccontextmanager
	async def lifespan(app: FastAPI):
	logger.info("=" * 55)
	logger.info(" VERIDEX v4.0 — Loading ML Models")
	logger.info("=" * 55)
	await load_all_models(MODELS_DIR)
	logger.success("🚀 All models ready — VERIDEX is online!")
	logger.info("=" * 55)
	yield
	logger.info("VERIDEX shutting down.")


	# ── App ──────────────────────────────────────────────────────────
	app = FastAPI(
	title="VERIDEX API v4",
	version="4.0.0",
	description="AI-powered deepfake & synthetic media forensics platform",
	lifespan=lifespan,
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ── Register sub-routers ─────────────────────────────────────────
	app.include_router(analyze_router)
	app.include_router(report_router)


	# ── MIME helpers ─────────────────────────────────────────────────
	ALLOWED_IMAGE = {"image/jpeg","image/png","image/webp","image/tiff","image/bmp"}
	ALLOWED_VIDEO = {"video/mp4","video/quicktime","video/webm","video/x-msvideo"}
	ALLOWED_AUDIO = {"audio/mpeg","audio/wav","audio/flac","audio/ogg","audio/aac"}
	ALLOWED_DOC = {"application/pdf"}
	ALL_ALLOWED = ALLOWED_IMAGE \| ALLOWED_VIDEO \| ALLOWED_AUDIO \| ALLOWED_DOC

	EXT_MAP = {
	"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png","webp":"image/webp",
	"tiff":"image/tiff","bmp":"image/bmp","mp4":"video/mp4","mov":"video/quicktime",
	"webm":"video/webm","avi":"video/x-msvideo","mp3":"audio/mpeg","wav":"audio/wav",
	"flac":"audio/flac","ogg":"audio/ogg","aac":"audio/aac","pdf":"application/pdf",
	}


	# ── Main /analyze endpoint ───────────────────────────────────────
	@app.post("/analyze")
	async def analyze(file: UploadFile = File(...), modules: str = "all"):

	# ── Resolve content type ─────────────────────────────────────
	ct = file.content_type or ""
	if ct not in ALL_ALLOWED or ct in ("application/octet-stream", ""):
	ext = (file.filename or "").rsplit(".", 1)[-1].lower()
	ct = EXT_MAP.get(ext, ct)
	if ct not in ALL_ALLOWED:
	raise HTTPException(415, f"Unsupported file type: {ct}")

	content = await file.read()
	if not content or len(content) < 50:
	raise HTTPException(400, "File is empty or too small")

	# Sanitize filename — remove special/unicode characters
	import re, unicodedata
	raw_name = file.filename or "unknown"
	safe_name = unicodedata.normalize("NFKD", raw_name)
	safe_name = safe_name.encode("ascii", "ignore").decode("ascii")
	safe_name = re.sub(r'[^\w\.\-]', '_', safe_name)
	file.filename = safe_name or "upload"

	# ── Hashing & metadata ───────────────────────────────────────
	hashes = file_fingerprint(content, file.filename or "")
	case_id = f"VRX-{int(time.time())}-{uuid.uuid4().hex[:6].upper()}"
	timestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
	enabled = (
	list(range(1, 47)) if modules == "all"
	else [int(x) for x in modules.split(",") if x.strip().isdigit()]
	)

	synth_result = None
	metadata = {}
	verdict = "UNKNOWN"
	confidence = 0.5
	module_scores = {}
	custody = ""
	key_findings = []
	ai_summary = ""
	heatmap_path = None
	chart_path = None

	try:
	# ── IMAGE ────────────────────────────────────────────────
	if ct in ALLOWED_IMAGE:

	# 46-module signal analysis
	results = await run_enabled_modules(content, ct, enabled)

	# 6-model ML analysis
	dl = await analyze_image(content)

	# EfficientNet fake probability (0=fake, 1=real in our model)
	effnet_fake = dl.get("effnet_score", dl.get("fake_prob", 0.5))

	# Merge ML scores into modules
	results["module_scores"][15] = 1.0 - effnet_fake # higher = more authentic
	results["module_scores"][40] = 1.0 - dl.get("clip_score", 0.5)
	results["module_scores"][41] = 1.0 - dl.get("sd_score", 0.5)
	results["module_scores"][42] = 1.0 - dl.get("gan_score", 0.5)

	# Recalculate verdict with EfficientNet score having high weight
	from utils.module_runner import MODULE_WEIGHTS, DEFAULT_WEIGHT
	scores = results["module_scores"]
	tw = sum(MODULE_WEIGHTS.get(m, DEFAULT_WEIGHT) for m in scores)
	ws = sum(MODULE_WEIGHTS.get(m, DEFAULT_WEIGHT) * scores[m] for m in scores)
	final_avg = ws / tw if tw > 0 else 0.5

	# EfficientNet override — 45% weight on final verdict
	effnet_authentic = 1.0 - effnet_fake
	combined = final_avg * 0.55 + effnet_authentic * 0.45

	if combined < 0.40:
	verdict = "FAKE"
	confidence = combined
	elif combined < 0.60:
	verdict = "SUSPICIOUS"
	confidence = combined
	else:
	verdict = "AUTHENTIC"
	confidence = combined

	logger.info(f"[Verdict] modules={final_avg:.3f} effnet={effnet_authentic:.3f} combined={combined:.3f} → {verdict}")

	# SynthID watermark (module 46)
	if 46 in enabled:
	synth_result = await detect_synth_id(content, ct)
	if synth_result:
	results["module_scores"][46] = 1.0 - synth_result.get("confidence", 0.5)

	metadata = extract_metadata(content, ct)
	# Inject face boxes and ELA data from ML models into metadata
	metadata["faces"] = dl.get("face_boxes", [])
	metadata["face_count"] = dl.get("face_count", 0)
	metadata["ela_data"] = dl.get("ela_data", "")
	metadata["freq_data"] = dl.get("freq_data", {})
	module_scores = results["module_scores"]
	custody = results["custody"]
	key_findings = results.get("key_findings", [])
	ai_summary = f"EfficientNet: {effnet_authentic:.1%} authentic \| Combined: {combined:.1%} \| faces: {dl.get('face_count',0)} \| {results.get('ai_summary','')}"

	# Visualizations
	try:
	import base64
	heatmap_b64 = ela_heatmap(content)
	chart_b64 = module_score_chart(module_scores)
	if heatmap_b64:
	heatmap_path = f"visuals/{case_id}_heatmap.png"
	with open(heatmap_path, "wb") as _f:
	_f.write(base64.b64decode(heatmap_b64))
	if chart_b64:
	chart_path = f"visuals/{case_id}_chart.png"
	with open(chart_path, "wb") as _f:
	_f.write(base64.b64decode(chart_b64))
	except Exception as ve:
	logger.warning(f"Visualization skipped: {ve}")

	# ── VIDEO ────────────────────────────────────────────────
	elif ct in ALLOWED_VIDEO:
	vr = await analyze_video(content)
	verdict = vr.get("verdict", "UNKNOWN")
	confidence = vr.get("confidence", 0.5)
	module_scores = {k: v for k, v in vr.items() if isinstance(v, (int, float))}
	custody = f"Video: {vr.get('analyzed_frames',0)} frames @ {vr.get('fps',0):.1f}fps"
	ai_summary = (
	f"Flow consistency: {vr.get('flow_consistency',0):.1%}, "
	f"Fake frames: {vr.get('fake_frame_count',0)}"
	)

	# ── AUDIO ────────────────────────────────────────────────
	elif ct in ALLOWED_AUDIO:
	ar = await analyze_audio(content)
	clone = ar.get("clone_prob", 0.5)
	verdict = "SYNTHETIC" if clone > 0.5 else "AUTHENTIC"
	confidence = clone if clone > 0.5 else ar.get("authentic_prob", 0.5)
	module_scores = {k: v for k, v in ar.items() if isinstance(v, (int, float))}
	custody = f"Audio: {ar.get('analyzed_duration',0):.1f}s analyzed"
	ai_summary = f"Voice clone probability: {clone:.1%}"

	# ── DOCUMENT ─────────────────────────────────────────────
	else:
	metadata = extract_metadata(content, ct)
	verdict = "UNKNOWN"
	confidence = 0.5
	custody = "Document metadata analysis only"

	except Exception as e:
	logger.error(f"Analysis error: {e}")
	raise HTTPException(500, f"Analysis error: {str(e)}")

	risk_score = round(
	(1.0 - confidence) * 100 if verdict == "AUTHENTIC" else confidence * 100, 1
	)

	response = {
	"case_id": case_id,
	"sha256": hashes["sha256"],
	"md5": hashes["md5"],
	"timestamp": timestamp,
	"file_name": file.filename,
	"file_type": ct,
	"file_size_bytes": len(content),
	"verdict": verdict,
	"confidence": round(float(confidence), 4),
	"risk_score": risk_score,
	"is_synth": synth_result["is_synthetic"] if synth_result else False,
	"synth_data": synth_result or {},
	"scores": {
	str(k): round(float(v), 4)
	for k, v in module_scores.items()
	if isinstance(v, (int, float))
	},
	"metadata": metadata,
	"enabled_modules": enabled,
	"chain_of_custody": custody,
	"key_findings": key_findings,
	"ai_summary": ai_summary,
	"heatmap_url": f"/visuals/{case_id}_heatmap.png" if heatmap_path else None,
	"chart_url": f"/visuals/{case_id}_chart.png" if chart_path else None,
	}

	try:
	response["report_path"] = generate_pdf_report(response)
	except Exception as e:
	logger.warning(f"Report generation failed: {e}")
	response["report_path"] = None

	return JSONResponse(content=response)


	# ── Integrity verification endpoint ─────────────────────────────
	@app.post("/verify")
	async def verify_file(file: UploadFile = File(...), expected_sha256: str = ""):
	content = await file.read()
	hashes = file_fingerprint(content, file.filename or "")
	ok = (hashes["sha256"] == expected_sha256) if expected_sha256 else None
	return {"hashes": hashes, "integrity_ok": ok}


	# ── Info endpoints ───────────────────────────────────────────────
	@app.get("/modules")
	def list_modules():
	return {
	"total": 46,
	"modules": [
	{"id": i, "name": MODULE_NAMES.get(i, f"Module {i}"),
	"weight": MODULE_WEIGHTS.get(i, 0.02)}
	for i in range(1, 47)
	],
	}


	@app.get("/health")
	def health():
	return {
	"status": "online",
	"version": "4.0.0",
	"modules": 46,
	"gpu": torch.cuda.is_available(),
	"device": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU",
	}


	# ── Static mounts ────────────────────────────────────────────────
	app.mount("/visuals", StaticFiles(directory="visuals"), name="visuals")

	_BASE = os.path.dirname(os.path.abspath(__file__))
	_FRONTEND = os.path.join(_BASE, "..", "frontend")


	@app.get("/")
	def root():
	fp = os.path.join(_FRONTEND, "index.html")
	if os.path.exists(fp):
	return FileResponse(fp, media_type="text/html")
	return JSONResponse({"app": "VERIDEX v4", "docs": "/docs"})


	if os.path.exists(_FRONTEND):
	app.mount("/static", StaticFiles(directory=_FRONTEND), name="static")


	if __name__ == "__main__":
	import uvicorn
	port = int(os.environ.get("PORT", 8000))
	uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)