Spaces:

seyma9gulsen
/

DeepFake-Video

Build error

App Files Files Community

seyma9gulsen commited on Jun 20, 2025

Commit

0424e50

1 Parent(s): 92b6df1

confidence-weighted prediction

Browse files

Files changed (2) hide show

app.py +13 -8
predictor.py +37 -14

app.py CHANGED Viewed

@@ -3,15 +3,20 @@ from predictor import predict_video
 from predictor import predict_video_with_cam
 def inference(video):
-    cams, (video_label, video_conf) = predict_video_with_cam(video, max_frames=3)
     if not cams:
         return "No face detected.", []
-    result_text = f"Video-Level Prediction → {video_label} ({video_conf:.2%})\n\n"
-    result_text += "\n".join(
-        [f"Frame {i+1} → {label} ({conf:.2%})" for i, (label, conf, _) in enumerate(cams)]
-    )
     images = [img for _, _, img in cams]
     return result_text, images
@@ -19,11 +24,11 @@ demo = gr.Interface(
     fn=inference,
     inputs=gr.Video(),
     outputs=[
-        gr.Text(),
-        gr.Gallery(label="Grad-CAM Visuals", columns=3)  # ✅ updated: no .style()
     ],
     title="Deepfake Detection App with Grad-CAM",
-    description="Upload a short video. The model will classify and visualize what parts of the face it focuses on."
 )
 if __name__ == "__main__":

 from predictor import predict_video_with_cam
 def inference(video):
+    cams, final_label, final_conf, faces_detected, total_frames = predict_video_with_cam(video, max_frames=5)
     if not cams:
         return "No face detected.", []
+    frame_info = [f"Frame {i+1} → {label} ({conf:.2%})" for i, (label, conf, _) in enumerate(cams)]
+    summary = f"**Video-Level Prediction → {final_label} ({final_conf:.2%})**"
+    face_warning = ""
+    if faces_detected < total_frames:
+        face_warning = f"\n Only {faces_detected} out of {total_frames} frames contained detectable faces."
+    result_text = summary + "\n" + "\n".join(frame_info) + face_warning
     images = [img for _, _, img in cams]
     return result_text, images
     fn=inference,
     inputs=gr.Video(),
     outputs=[
+        gr.Markdown(label="Prediction Summary"),
+        gr.Gallery(label="Grad-CAM Visuals")
     ],
     title="Deepfake Detection App with Grad-CAM",
+    description="Upload a short video. The model will classify and visualize what parts of the face it focuses on.\n\n**Model Info:** EfficientNet-B0 trained on FaceForensics++ subset with MixUp and Label Smoothing. Grad-CAM used for explainability."
 )
 if __name__ == "__main__":

predictor.py CHANGED Viewed

@@ -8,6 +8,8 @@ from collections import Counter
 from pytorch_grad_cam import GradCAM
 from pytorch_grad_cam.utils.image import show_cam_on_image
 import numpy as np
 # Cihaz kontrolü (CPU zorunlu)
 device = torch.device("cpu")
@@ -59,13 +61,18 @@ def predict_video(video_path, max_frames=10):
     label = learner.dls.vocab[final_pred]
     return label, final_conf
-def predict_video_with_cam(video_path, max_frames=3):
     cap = cv2.VideoCapture(video_path)
     learner.model.eval()
     results = []
-    count = 0
-    while cap.isOpened() and count < max_frames:
         ret, frame = cap.read()
         if not ret:
             break
@@ -74,6 +81,7 @@ def predict_video_with_cam(video_path, max_frames=3):
         boxes = detections[0].boxes.xyxy
         if len(boxes) > 0:
             x1, y1, x2, y2 = map(int, boxes[0].tolist())
             face = frame[y1:y2, x1:x2]
             if face.size == 0:
@@ -94,22 +102,37 @@ def predict_video_with_cam(video_path, max_frames=3):
             target_layer = get_last_conv_layer(learner.model)
             cam = GradCAM(model=learner.model, target_layers=[target_layer])
             input_tensor = learner.dls.test_dl([img]).one_batch()[0]
-            grayscale_cam = cam(input_tensor=input_tensor)[0]
             img_array = np.array(img.resize((224, 224))) / 255.0
             cam_image = show_cam_on_image(img_array, grayscale_cam, use_rgb=True)
-            results.append((learner.dls.vocab[pred_class], conf, cam_image))
-            count += 1
-    cap.release()
-    if not results:
-        return [], ("unknown", 0.0)
-    # 🔁 Majority vote (video-level prediction)
-    majority_label = Counter([label for label, _, _ in results]).most_common(1)[0][0]
-    majority_conf = np.mean([conf for label, conf, _ in results if label == majority_label])
-    return results, (majority_label, majority_conf)

 from pytorch_grad_cam import GradCAM
 from pytorch_grad_cam.utils.image import show_cam_on_image
 import numpy as np
+from collections import defaultdict
 # Cihaz kontrolü (CPU zorunlu)
 device = torch.device("cpu")
     label = learner.dls.vocab[final_pred]
     return label, final_conf
+def predict_video_with_cam(video_path, max_frames=5):
     cap = cv2.VideoCapture(video_path)
     learner.model.eval()
     results = []
+    preds_list = []
+    confs_list = []
+    frame_index = 0
+    frames_with_faces = 0
+    while cap.isOpened() and frame_index < max_frames:
         ret, frame = cap.read()
         if not ret:
             break
         boxes = detections[0].boxes.xyxy
         if len(boxes) > 0:
+            frames_with_faces += 1
             x1, y1, x2, y2 = map(int, boxes[0].tolist())
             face = frame[y1:y2, x1:x2]
             if face.size == 0:
             target_layer = get_last_conv_layer(learner.model)
             cam = GradCAM(model=learner.model, target_layers=[target_layer])
             input_tensor = learner.dls.test_dl([img]).one_batch()[0]
+            grayscale_cam = cam(input_tensor=input_tensor)[0]
             img_array = np.array(img.resize((224, 224))) / 255.0
             cam_image = show_cam_on_image(img_array, grayscale_cam, use_rgb=True)
+            label = learner.dls.vocab[pred_class]
+            results.append((label, conf, cam_image))
+            preds_list.append(pred_class)
+            confs_list.append(conf)
+        frame_index += 1
+    cap.release()
+    # Weighted confidence score
+    if preds_list:
+        class_indices = list(set(preds_list))
+        class_scores = defaultdict(float)
+        total_weight = defaultdict(float)
+        for pred, conf in zip(preds_list, confs_list):
+            class_scores[pred] += conf
+            total_weight[pred] += 1
+        weighted_avg = {c: class_scores[c]/total_weight[c] for c in class_scores}
+        final_pred = max(weighted_avg, key=weighted_avg.get)
+        final_conf = weighted_avg[final_pred]
+        final_label = learner.dls.vocab[final_pred]
+    else:
+        final_label = "unknown"
+        final_conf = 0.0
+    return results, final_label, final_conf, frames_with_faces, max_frames