seyma9gulsen commited on
Commit
0424e50
·
1 Parent(s): 92b6df1

confidence-weighted prediction

Browse files
Files changed (2) hide show
  1. app.py +13 -8
  2. predictor.py +37 -14
app.py CHANGED
@@ -3,15 +3,20 @@ from predictor import predict_video
3
  from predictor import predict_video_with_cam
4
 
5
  def inference(video):
6
- cams, (video_label, video_conf) = predict_video_with_cam(video, max_frames=3)
7
 
8
  if not cams:
9
  return "No face detected.", []
10
 
11
- result_text = f"Video-Level Prediction → {video_label} ({video_conf:.2%})\n\n"
12
- result_text += "\n".join(
13
- [f"Frame {i+1} → {label} ({conf:.2%})" for i, (label, conf, _) in enumerate(cams)]
14
- )
 
 
 
 
 
15
  images = [img for _, _, img in cams]
16
  return result_text, images
17
 
@@ -19,11 +24,11 @@ demo = gr.Interface(
19
  fn=inference,
20
  inputs=gr.Video(),
21
  outputs=[
22
- gr.Text(),
23
- gr.Gallery(label="Grad-CAM Visuals", columns=3) # ✅ updated: no .style()
24
  ],
25
  title="Deepfake Detection App with Grad-CAM",
26
- description="Upload a short video. The model will classify and visualize what parts of the face it focuses on."
27
  )
28
 
29
  if __name__ == "__main__":
 
3
  from predictor import predict_video_with_cam
4
 
5
  def inference(video):
6
+ cams, final_label, final_conf, faces_detected, total_frames = predict_video_with_cam(video, max_frames=5)
7
 
8
  if not cams:
9
  return "No face detected.", []
10
 
11
+ frame_info = [f"Frame {i+1} → {label} ({conf:.2%})" for i, (label, conf, _) in enumerate(cams)]
12
+ summary = f"**Video-Level Prediction → {final_label} ({final_conf:.2%})**"
13
+
14
+ face_warning = ""
15
+ if faces_detected < total_frames:
16
+ face_warning = f"\n Only {faces_detected} out of {total_frames} frames contained detectable faces."
17
+
18
+ result_text = summary + "\n" + "\n".join(frame_info) + face_warning
19
+
20
  images = [img for _, _, img in cams]
21
  return result_text, images
22
 
 
24
  fn=inference,
25
  inputs=gr.Video(),
26
  outputs=[
27
+ gr.Markdown(label="Prediction Summary"),
28
+ gr.Gallery(label="Grad-CAM Visuals")
29
  ],
30
  title="Deepfake Detection App with Grad-CAM",
31
+ description="Upload a short video. The model will classify and visualize what parts of the face it focuses on.\n\n**Model Info:** EfficientNet-B0 trained on FaceForensics++ subset with MixUp and Label Smoothing. Grad-CAM used for explainability."
32
  )
33
 
34
  if __name__ == "__main__":
predictor.py CHANGED
@@ -8,6 +8,8 @@ from collections import Counter
8
  from pytorch_grad_cam import GradCAM
9
  from pytorch_grad_cam.utils.image import show_cam_on_image
10
  import numpy as np
 
 
11
 
12
  # Cihaz kontrolü (CPU zorunlu)
13
  device = torch.device("cpu")
@@ -59,13 +61,18 @@ def predict_video(video_path, max_frames=10):
59
  label = learner.dls.vocab[final_pred]
60
  return label, final_conf
61
 
62
- def predict_video_with_cam(video_path, max_frames=3):
63
  cap = cv2.VideoCapture(video_path)
64
  learner.model.eval()
 
65
  results = []
 
 
66
 
67
- count = 0
68
- while cap.isOpened() and count < max_frames:
 
 
69
  ret, frame = cap.read()
70
  if not ret:
71
  break
@@ -74,6 +81,7 @@ def predict_video_with_cam(video_path, max_frames=3):
74
  boxes = detections[0].boxes.xyxy
75
 
76
  if len(boxes) > 0:
 
77
  x1, y1, x2, y2 = map(int, boxes[0].tolist())
78
  face = frame[y1:y2, x1:x2]
79
  if face.size == 0:
@@ -94,22 +102,37 @@ def predict_video_with_cam(video_path, max_frames=3):
94
  target_layer = get_last_conv_layer(learner.model)
95
  cam = GradCAM(model=learner.model, target_layers=[target_layer])
96
  input_tensor = learner.dls.test_dl([img]).one_batch()[0]
97
- grayscale_cam = cam(input_tensor=input_tensor)[0]
98
 
 
99
  img_array = np.array(img.resize((224, 224))) / 255.0
100
  cam_image = show_cam_on_image(img_array, grayscale_cam, use_rgb=True)
101
 
102
- results.append((learner.dls.vocab[pred_class], conf, cam_image))
103
- count += 1
104
-
105
- cap.release()
106
 
107
- if not results:
108
- return [], ("unknown", 0.0)
109
 
110
- # 🔁 Majority vote (video-level prediction)
111
- majority_label = Counter([label for label, _, _ in results]).most_common(1)[0][0]
112
- majority_conf = np.mean([conf for label, conf, _ in results if label == majority_label])
113
 
114
- return results, (majority_label, majority_conf)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
8
  from pytorch_grad_cam import GradCAM
9
  from pytorch_grad_cam.utils.image import show_cam_on_image
10
  import numpy as np
11
+ from collections import defaultdict
12
+
13
 
14
  # Cihaz kontrolü (CPU zorunlu)
15
  device = torch.device("cpu")
 
61
  label = learner.dls.vocab[final_pred]
62
  return label, final_conf
63
 
64
+ def predict_video_with_cam(video_path, max_frames=5):
65
  cap = cv2.VideoCapture(video_path)
66
  learner.model.eval()
67
+
68
  results = []
69
+ preds_list = []
70
+ confs_list = []
71
 
72
+ frame_index = 0
73
+ frames_with_faces = 0
74
+
75
+ while cap.isOpened() and frame_index < max_frames:
76
  ret, frame = cap.read()
77
  if not ret:
78
  break
 
81
  boxes = detections[0].boxes.xyxy
82
 
83
  if len(boxes) > 0:
84
+ frames_with_faces += 1
85
  x1, y1, x2, y2 = map(int, boxes[0].tolist())
86
  face = frame[y1:y2, x1:x2]
87
  if face.size == 0:
 
102
  target_layer = get_last_conv_layer(learner.model)
103
  cam = GradCAM(model=learner.model, target_layers=[target_layer])
104
  input_tensor = learner.dls.test_dl([img]).one_batch()[0]
 
105
 
106
+ grayscale_cam = cam(input_tensor=input_tensor)[0]
107
  img_array = np.array(img.resize((224, 224))) / 255.0
108
  cam_image = show_cam_on_image(img_array, grayscale_cam, use_rgb=True)
109
 
110
+ label = learner.dls.vocab[pred_class]
111
+ results.append((label, conf, cam_image))
112
+ preds_list.append(pred_class)
113
+ confs_list.append(conf)
114
 
115
+ frame_index += 1
 
116
 
117
+ cap.release()
 
 
118
 
119
+ # Weighted confidence score
120
+ if preds_list:
121
+ class_indices = list(set(preds_list))
122
+ class_scores = defaultdict(float)
123
+ total_weight = defaultdict(float)
124
+
125
+ for pred, conf in zip(preds_list, confs_list):
126
+ class_scores[pred] += conf
127
+ total_weight[pred] += 1
128
+
129
+ weighted_avg = {c: class_scores[c]/total_weight[c] for c in class_scores}
130
+ final_pred = max(weighted_avg, key=weighted_avg.get)
131
+ final_conf = weighted_avg[final_pred]
132
+ final_label = learner.dls.vocab[final_pred]
133
+ else:
134
+ final_label = "unknown"
135
+ final_conf = 0.0
136
+
137
+ return results, final_label, final_conf, frames_with_faces, max_frames
138