Spaces:

MrTsp
/

DeepShield-Web

Sleeping

App Files Files Community

MrTsp commited on Apr 21

Commit

07f2243

1 Parent(s): 77037c2

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -34

app.py CHANGED Viewed

@@ -54,17 +54,17 @@ class DINOv2Extractor(nn.Module):
 class MLPClassifier(nn.Module):
-    def __init__(self, input_dim: int = 1536, num_classes: int = 2, dropout: float = 0.3):
         super().__init__()
         self.net = nn.Sequential(
             nn.Linear(input_dim, 512),
-            nn.LayerNorm(512),
             nn.GELU(),
             nn.Dropout(dropout),
             nn.Linear(512, 256),
-            nn.LayerNorm(256),
             nn.GELU(),
-            nn.Dropout(dropout / 2),
             nn.Linear(256, num_classes),
         )
@@ -119,9 +119,8 @@ try:
     MTCNN_DETECTOR = MTCNN(
         image_size=224,
         margin=40,
-        min_face_size=20,
-        thresholds=[0.6, 0.7, 0.9],
         keep_all=False,
         device='cpu'
     )
     logger.info("MTCNN face detector initialized.")
@@ -131,6 +130,7 @@ except Exception as e:
 TRANSFORM = T.Compose([
     T.Resize((224, 224)),
     T.ToTensor(),
     T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
 ])
@@ -141,13 +141,28 @@ def detect_face_crop(img: Image.Image) -> Image.Image:
     if MTCNN_DETECTOR is None:
         return None
     try:
-        # MTCNN returns the cropped tensor directly
-        face_tensor = MTCNN_DETECTOR(img)
-        if face_tensor is not None:
-            # Convert tensor back to PIL Image
-            face_np = face_tensor.permute(1, 2, 0).numpy()
-            face_np = ((face_np * 128) + 127.5).clip(0, 255).astype(np.uint8)
-            return Image.fromarray(face_np)
     except Exception:
         pass
     return None
@@ -233,24 +248,10 @@ def run_inference(model: DeepfakeDetector, frame_paths: list) -> dict:
     if not fake_probs:
         raise ValueError("No frames could be processed.")
-    # 1. Advanced Aggregation (Top 50% Mean)
-    # Deepfake artifacts might only appear in parts of the video.
-    # Averaging all frames dilutes the score. We take the top 50% most suspicious frames.
-    sorted_probs = sorted(fake_probs, reverse=True)
-    top_k = max(1, len(sorted_probs) // 2)
-    video_fake_prob = float(np.mean(sorted_probs[:top_k]))
-    # 2. Ratio Check
-    # If at least 30% of frames are distinctly flagged as Fake, mark the whole video as Fake.
-    fake_frame_count = sum(1 for p in fake_probs if p > 0.5)
-    fake_ratio = fake_frame_count / len(fake_probs)
-    is_fake = (video_fake_prob > 0.5) or (fake_ratio >= 0.3)
-    # Ensure UI consistency: If flagged as FAKE by ratio, but probability is low, boost it to 51%
-    if is_fake and video_fake_prob <= 0.5:
-        video_fake_prob = 0.51
     avg_real = 1.0 - video_fake_prob
     return {
@@ -287,7 +288,7 @@ def health_check():
 @app.post("/predict")
 async def predict(file: UploadFile = File(...)):
-    allowed_exts = {".mp4", ".mov", ".avi", ".mkv"}
     ext = Path(file.filename).suffix.lower() if file.filename else ""
     if ext not in allowed_exts:
@@ -312,9 +313,14 @@ async def predict(file: UploadFile = File(...)):
         model = load_model()
         logger.info(f"[{job_id}] Processing: {file.filename} ({size_mb:.1f} MB)")
-        frame_paths = extract_frames(str(video_path), str(frames_dir))
-        if not frame_paths:
-            raise HTTPException(422, "No frames could be extracted from video.")
         result = run_inference(model, frame_paths)
         result["filename"] = file.filename

 class MLPClassifier(nn.Module):
+    def __init__(self, input_dim: int = 1536, num_classes: int = 2, dropout: float = 0.4):
         super().__init__()
         self.net = nn.Sequential(
             nn.Linear(input_dim, 512),
+            nn.BatchNorm1d(512),
             nn.GELU(),
             nn.Dropout(dropout),
             nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
             nn.GELU(),
+            nn.Dropout(dropout * 0.75),
             nn.Linear(256, num_classes),
         )
     MTCNN_DETECTOR = MTCNN(
         image_size=224,
         margin=40,
         keep_all=False,
+        post_process=False,
         device='cpu'
     )
     logger.info("MTCNN face detector initialized.")
 TRANSFORM = T.Compose([
     T.Resize((224, 224)),
+    T.CenterCrop(224),
     T.ToTensor(),
     T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
 ])
     if MTCNN_DETECTOR is None:
         return None
     try:
+        boxes, probs = MTCNN_DETECTOR.detect(img)
+        if boxes is None or len(boxes) == 0:
+            return None
+        best_idx = np.argmax(probs)
+        best_prob = probs[best_idx]
+        if best_prob < 0.9:
+            return None
+        box = boxes[best_idx]
+        w, h = img.size
+        x1, y1, x2, y2 = [int(b) for b in box]
+        margin = 40
+        x1 = max(0, x1 - margin)
+        y1 = max(0, y1 - margin)
+        x2 = min(w, x2 + margin)
+        y2 = min(h, y2 + margin)
+        face = img.crop((x1, y1, x2, y2))
+        return face.resize((224, 224), Image.LANCZOS)
     except Exception:
         pass
     return None
     if not fake_probs:
         raise ValueError("No frames could be processed.")
+    # 1. Simple Aggregation (Mean) to match test_real.py
+    video_fake_prob = float(np.mean(fake_probs))
+    is_fake = video_fake_prob > 0.5
     avg_real = 1.0 - video_fake_prob
     return {
 @app.post("/predict")
 async def predict(file: UploadFile = File(...)):
+    allowed_exts = {".mp4", ".mov", ".avi", ".mkv", ".jpg", ".jpeg", ".png", ".webp"}
     ext = Path(file.filename).suffix.lower() if file.filename else ""
     if ext not in allowed_exts:
         model = load_model()
         logger.info(f"[{job_id}] Processing: {file.filename} ({size_mb:.1f} MB)")
+        if ext in {".mp4", ".mov", ".avi", ".mkv"}:
+            frame_paths = extract_frames(str(video_path), str(frames_dir))
+            if not frame_paths:
+                raise HTTPException(422, "No frames could be extracted from video.")
+        else:
+            img_path = frames_dir / f"frame_0000{ext}"
+            shutil.copy(video_path, img_path)
+            frame_paths = [str(img_path)]
         result = run_inference(model, frame_paths)
         result["filename"] = file.filename