Spaces:

MrTsp
/

DeepShield-Web2

Sleeping

App Files Files Community

MrTsp commited on 30 days ago

Commit

e8f9f2d

verified ·

1 Parent(s): 0226ac6

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -98

app.py CHANGED Viewed

@@ -1,11 +1,7 @@
 """
 DeepShield AI — Full-Stack FastAPI Backend
 Serves the frontend UI + deepfake detection API from one HF Space.
-Routes:
-  GET  /          → Serves index.html (the web UI)
-  GET  /health    → JSON health check
-  POST /predict   → Video/Photo upload → REAL/FAKE prediction
 """
 import os
@@ -15,27 +11,81 @@ import shutil
 import logging
 import tempfile
 from pathlib import Path
 import cv2
 import torch
 import numpy as np
 from PIL import Image, ImageFile
 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger(__name__)
-# --- START OF IMPORT EXACTLY AS test_real.py ---
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from src.utils.helpers import load_config, get_device
-from src.data.transforms import get_val_transforms
-from src.models.classifier import DeepfakeClassifier
-from src.utils.face_detect import FaceDetector
-# --- END OF IMPORT ---
 app = FastAPI(
     title="DeepShield AI",
@@ -51,30 +101,69 @@ app.add_middleware(
     allow_headers=["*"],
 )
-DEVICE = get_device('cuda' if torch.cuda.is_available() else 'cpu')
 CHECKPOINT_PATH = Path("best_model.pth")
 MAX_FRAMES = 20
 MAX_FILE_MB = 30
 MAX_DURATION_SEC = 60
-# Model caching
-global_model = None
-global_face_detector = None
-global_transform = None
-global_dual_input = True
-def load_model_and_detector():
-    global global_model, global_face_detector, global_transform, global_dual_input
-    if global_model is not None:
-        return global_model, global_face_detector, global_transform, global_dual_input
     ckpt_path_to_load = None
     if not CHECKPOINT_PATH.exists():
-        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        fallback_path = os.path.join(base_dir, 'models2/checkpoints/best_model.pth')
-        if not os.path.exists(fallback_path):
-            fallback_path = os.path.join(base_dir, 'models2/checkpoints/best_mlp.pth')
         if os.path.exists(fallback_path):
             ckpt_path_to_load = fallback_path
         else:
@@ -83,58 +172,18 @@ def load_model_and_detector():
         ckpt_path_to_load = str(CHECKPOINT_PATH)
     logger.info(f"Loading checkpoint on {DEVICE} from {ckpt_path_to_load} ...")
-    checkpoint = torch.load(ckpt_path_to_load, map_location=str(DEVICE))
-    is_fast_mlp = ckpt_path_to_load.endswith('best_mlp.pth')
-    dual_input = True
-    if is_fast_mlp and 'feat_dim' in checkpoint:
-        dual_input = (checkpoint['feat_dim'] == 1536)
-    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    config_path = os.path.join(base_dir, 'configs/config2.yaml')
-    if os.path.exists(config_path):
-        config = load_config(config_path)
-    else:
-        # Fallback if config is missing in HF Spaces standalone
-        config = {
-            'model': {'dino_variant': 'dinov2_vitb14', 'unfreeze_last_n_blocks': 0, 'dual_input': True},
-            'face_detection': {'margin': 40, 'confidence_threshold': 0.9},
-            'data': {'image_size': 224}
-        }
-    if not is_fast_mlp:
-        dual_input = config['model'].get('dual_input', True)
-    face_detector = FaceDetector(
-        margin=config['face_detection']['margin'],
-        confidence_threshold=config['face_detection']['confidence_threshold'],
-        image_size=config['data']['image_size'],
-        device=str(DEVICE)
-    ) if dual_input else None
-    model = DeepfakeClassifier(
-        dino_variant=config['model']['dino_variant'],
-        freeze_backbone=not is_fast_mlp,
-        unfreeze_last_n_blocks=config['model']['unfreeze_last_n_blocks'] if not is_fast_mlp else 0,
-        dual_input=dual_input
-    )
-    if is_fast_mlp:
-        model.classifier.load_state_dict(checkpoint['model_state_dict'])
-    else:
-        model.load_state_dict(checkpoint['model_state_dict'] if 'model_state_dict' in checkpoint else checkpoint)
-    model = model.to(DEVICE).eval()
-    transform = get_val_transforms(config['data']['image_size'])
-    global_model = model
-    global_face_detector = face_detector
-    global_transform = transform
-    global_dual_input = dual_input
-    logger.info(f"Model ready. dual_input={dual_input}, device={DEVICE}, is_fast_mlp={is_fast_mlp}")
-    return model, face_detector, transform, dual_input
 def extract_frames(video_path: str, temp_dir: str, num_frames: int = MAX_FRAMES) -> list:
     cap = cv2.VideoCapture(video_path)
@@ -157,33 +206,30 @@ def extract_frames(video_path: str, temp_dir: str, num_frames: int = MAX_FRAMES)
     return saved
-def run_inference(frame_paths: list) -> dict:
-    model, face_detector, transform, dual_input = load_model_and_detector()
     fake_probs = []
     with torch.no_grad():
-        for i, p in enumerate(frame_paths):
             try:
-                img = Image.open(p).convert('RGB')
-                t_img = transform(img).unsqueeze(0).to(DEVICE)
                 t_face = t_img
-                if dual_input:
-                    face = face_detector.detect_and_crop(p)
-                    if face is not None:
-                        t_face = transform(face).unsqueeze(0).to(DEVICE)
-                probs = torch.softmax(model(t_img, t_face if dual_input else None), dim=1)
-                prob = probs[0, 1].item()
                 fake_probs.append(prob)
             except Exception as e:
-                logger.warning(f"Skipping frame {p}: {e}")
     if not fake_probs:
         raise ValueError("No frames could be processed.")
     video_fake_prob = float(np.mean(fake_probs))
     is_fake = video_fake_prob > 0.5
     avg_real = 1.0 - video_fake_prob
@@ -196,15 +242,17 @@ def run_inference(frame_paths: list) -> dict:
         "per_frame_scores": [round(p * 100, 1) for p in fake_probs],
     }
 @app.on_event("startup")
 async def startup_event():
     try:
-        load_model_and_detector()
     except Exception as e:
         logger.error(f"Startup model load failed: {e}")
 @app.get("/health")
 def health_check():
     try:
@@ -219,7 +267,6 @@ def health_check():
         "model_loaded": model_loaded,
     }
 @app.post("/predict")
 async def predict(file: UploadFile = File(...)):
     allowed_exts = {".mp4", ".mov", ".avi", ".mkv", ".jpg", ".jpeg", ".png", ".webp"}
@@ -244,6 +291,7 @@ async def predict(file: UploadFile = File(...)):
             f.write(content)
         del content
         logger.info(f"[{job_id}] Processing: {file.filename} ({size_mb:.1f} MB)")
         if ext in {".mp4", ".mov", ".avi", ".mkv"}:
@@ -255,7 +303,7 @@ async def predict(file: UploadFile = File(...)):
             shutil.copy(video_path, img_path)
             frame_paths = [str(img_path)]
-        result = run_inference(frame_paths)
         result["filename"] = file.filename
         result["file_size_mb"] = round(size_mb, 2)
         result["job_id"] = job_id

 """
 DeepShield AI — Full-Stack FastAPI Backend
 Serves the frontend UI + deepfake detection API from one HF Space.
+Self-contained version with exact architectural parity to test_real.py
 """
 import os
 import logging
 import tempfile
 from pathlib import Path
+from functools import lru_cache
 import cv2
 import torch
+import torch.nn as nn
 import numpy as np
 from PIL import Image, ImageFile
+from facenet_pytorch import MTCNN
 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
+import torchvision.transforms as T
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger(__name__)
+# -------------------------------------------------------------
+# EXACT PARITY MODEL DEFINITIONS (Copied from src/ to be standalone)
+# -------------------------------------------------------------
+class DINOv2Extractor(nn.Module):
+    def __init__(self, variant: str = 'dinov2_vitb14'):
+        super().__init__()
+        self.embed_dim = 768
+        logger.info(f"Loading {variant} from torch.hub ...")
+        self.backbone = torch.hub.load(
+            'facebookresearch/dinov2', variant, pretrained=True,
+        )
+        logger.info("DINOv2 loaded.")
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.backbone(x)
+class MLPClassifier(nn.Module):
+    def __init__(self, input_dim: int = 1536, num_classes: int = 2, dropout: float = 0.4):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(input_dim, 512),
+            nn.BatchNorm1d(512),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.GELU(),
+            nn.Dropout(dropout * 0.75),
+            nn.Linear(256, num_classes),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.net(x)
+class DeepfakeDetector(nn.Module):
+    def __init__(self, dual_input: bool = True):
+        super().__init__()
+        self.dual_input = dual_input
+        self.extractor = DINOv2Extractor('dinov2_vitb14')
+        feat_dim = 1536 if dual_input else 768
+        self.classifier = MLPClassifier(feat_dim)
+    def forward(self, full_image: torch.Tensor, face_crop: torch.Tensor = None) -> torch.Tensor:
+        full_feat = self.extractor(full_image)
+        if self.dual_input:
+            face_feat = self.extractor(face_crop if face_crop is not None else full_image)
+            features  = torch.cat([full_feat, face_feat], dim=1)
+        else:
+            features = full_feat
+        return self.classifier(features)
+# -------------------------------------------------------------
+# APP SETTINGS & SETUP
+# -------------------------------------------------------------
 app = FastAPI(
     title="DeepShield AI",
     allow_headers=["*"],
 )
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 CHECKPOINT_PATH = Path("best_model.pth")
 MAX_FRAMES = 20
 MAX_FILE_MB = 30
 MAX_DURATION_SEC = 60
+# MTCNN face detector setup to mimic src/utils/face_detect.py precisely
+try:
+    MTCNN_DETECTOR = MTCNN(
+        image_size=224,
+        margin=40,
+        keep_all=False,
+        post_process=False,
+        device='cpu'
+    )
+    logger.info("MTCNN face detector initialized.")
+except Exception as e:
+    MTCNN_DETECTOR = None
+    logger.warning(f"MTCNN init failed (will use fallback): {e}")
+# Exact transform replication
+TRANSFORM = T.Compose([
+    T.Resize((224, 224)),
+    T.CenterCrop(224),
+    T.ToTensor(),
+    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])
+def detect_face_crop(img: Image.Image) -> Image.Image:
+    if MTCNN_DETECTOR is None:
+        return None
+    try:
+        boxes, probs = MTCNN_DETECTOR.detect(img)
+        if boxes is None or len(boxes) == 0:
+            return None
+        best_idx = np.argmax(probs)
+        best_prob = probs[best_idx]
+        if best_prob < 0.9:
+            return None
+        box = boxes[best_idx]
+        w, h = img.size
+        x1, y1, x2, y2 = [int(b) for b in box]
+        margin = 40
+        x1 = max(0, x1 - margin)
+        y1 = max(0, y1 - margin)
+        x2 = min(w, x2 + margin)
+        y2 = min(h, y2 + margin)
+        face = img.crop((x1, y1, x2, y2))
+        return face.resize((224, 224), Image.LANCZOS)
+    except Exception:
+        pass
+    return None
+@lru_cache(maxsize=1)
+def load_model() -> DeepfakeDetector:
+    # First check default path, then fallback if possible
     ckpt_path_to_load = None
     if not CHECKPOINT_PATH.exists():
+        fallback_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models2/checkpoints/best_model.pth')
         if os.path.exists(fallback_path):
             ckpt_path_to_load = fallback_path
         else:
         ckpt_path_to_load = str(CHECKPOINT_PATH)
     logger.info(f"Loading checkpoint on {DEVICE} from {ckpt_path_to_load} ...")
+    ckpt = torch.load(ckpt_path_to_load, map_location=DEVICE)
+    state = ckpt.get("model_state_dict", ckpt)
+    # Determine architecture
+    mlp_w = state.get("classifier.net.0.weight", None)
+    dual = (mlp_w.shape[1] == 1536) if mlp_w is not None else True
+    model = DeepfakeDetector(dual_input=dual).to(DEVICE)
+    model.load_state_dict(state, strict=False)
+    model.eval()
+    logger.info(f"Model ready. dual_input={dual}, device={DEVICE}")
+    return model
 def extract_frames(video_path: str, temp_dir: str, num_frames: int = MAX_FRAMES) -> list:
     cap = cv2.VideoCapture(video_path)
     return saved
+def run_inference(model: DeepfakeDetector, frame_paths: list) -> dict:
     fake_probs = []
     with torch.no_grad():
+        for fpath in frame_paths:
             try:
+                img = Image.open(fpath).convert("RGB")
+                t_img = TRANSFORM(img).unsqueeze(0).to(DEVICE)
                 t_face = t_img
+                if model.dual_input:
+                    face_crop = detect_face_crop(img)
+                    if face_crop is not None:
+                        t_face = TRANSFORM(face_crop).unsqueeze(0).to(DEVICE)
+                logits = model(t_img, t_face if model.dual_input else None)
+                prob = torch.softmax(logits, dim=1)[0, 1].item()
                 fake_probs.append(prob)
             except Exception as e:
+                logger.warning(f"Skipping frame {fpath}: {e}")
     if not fake_probs:
         raise ValueError("No frames could be processed.")
     video_fake_prob = float(np.mean(fake_probs))
     is_fake = video_fake_prob > 0.5
     avg_real = 1.0 - video_fake_prob
         "per_frame_scores": [round(p * 100, 1) for p in fake_probs],
     }
+# -------------------------------------------------------------
+# API ROUTES
+# -------------------------------------------------------------
 @app.on_event("startup")
 async def startup_event():
     try:
+        load_model()
     except Exception as e:
         logger.error(f"Startup model load failed: {e}")
 @app.get("/health")
 def health_check():
     try:
         "model_loaded": model_loaded,
     }
 @app.post("/predict")
 async def predict(file: UploadFile = File(...)):
     allowed_exts = {".mp4", ".mov", ".avi", ".mkv", ".jpg", ".jpeg", ".png", ".webp"}
             f.write(content)
         del content
+        model = load_model()
         logger.info(f"[{job_id}] Processing: {file.filename} ({size_mb:.1f} MB)")
         if ext in {".mp4", ".mov", ".avi", ".mkv"}:
             shutil.copy(video_path, img_path)
             frame_paths = [str(img_path)]
+        result = run_inference(model, frame_paths)
         result["filename"] = file.filename
         result["file_size_mb"] = round(size_mb, 2)
         result["job_id"] = job_id