Spaces:

SanskarModi
/

deepdetect

Sleeping

App Files Files Community

SanskarModi commited on Apr 8, 2025

Commit

397b9f3

verified ·

1 Parent(s): 04bb1f8

Update prediction.py

Browse files

Files changed (1) hide show

prediction.py +26 -17

prediction.py CHANGED Viewed

@@ -281,19 +281,19 @@ class Prediction:
     def predict(self, video, seq_length=None):
         """
         Predict whether a video is real or fake.
         Args:
             video (str): Path to the video file
             seq_length (int, optional): Number of frames to use
         Returns:
             tuple: (prediction_result, gradcam_image, classification_details)
         """
         frames, raw_frames = self.preprocess(video, seq_length)
         if not frames:
             return "No faces detected in the video", None, None
         # Prepare input tensor for the model
         target_seq_length = (
             seq_length if seq_length is not None else self.default_frame_count
@@ -302,14 +302,23 @@ class Prediction:
         input_tensor = input_tensor.view(1, target_seq_length, 3, *self.resolution)
         input_tensor = input_tensor.to(self.device)
         input_tensor.requires_grad_()
-        # Forward pass to get feature maps and final output
-        fmap, attn_wts, output = self.model(input_tensor)
         fmap.register_hook(self.save_gradients)
         # Get predictions for all classes
-        class_probs = F.softmax(output, dim=1).detach().cpu().numpy()[0]
         # Get the predicted class
         predicted_class_idx = np.argmax(class_probs)
         predicted_class = (
@@ -318,7 +327,7 @@ class Prediction:
             else "Unknown"
         )
         prediction = "Deepfake" if predicted_class_idx > 0 else "Real"
         # Format confidence values to 2 decimal places
         confidence_class = round(class_probs[predicted_class_idx] * 100, 2)
         confidence_deepfake_real = (
@@ -327,7 +336,7 @@ class Prediction:
             else round(class_probs[0] * 100, 2)
         )
         prediction_string = f"{prediction} {confidence_deepfake_real:.2f}% Confidence"
         # Create detailed classification results
         classification_details = (
             {
@@ -340,18 +349,18 @@ class Prediction:
                 "confidence(%)": f"{confidence_class:.2f}",
             }
         )
         # Backpropagate for Grad-CAM
         self.model.zero_grad()
-        output[0, predicted_class_idx].backward()
         grads = self.gradients
         # Generate Grad-CAM visualization for the best frame
         if raw_frames:
-            # Choose middle frame for visualization
             middle_idx = len(raw_frames) // 2
             gradcam_image = self.generate_gradcam(fmap, raw_frames[middle_idx], grads)
         else:
             gradcam_image = None
         return prediction_string, gradcam_image, classification_details

     def predict(self, video, seq_length=None):
         """
         Predict whether a video is real or fake.
         Args:
             video (str): Path to the video file
             seq_length (int, optional): Number of frames to use
         Returns:
             tuple: (prediction_result, gradcam_image, classification_details)
         """
         frames, raw_frames = self.preprocess(video, seq_length)
         if not frames:
             return "No faces detected in the video", None, None
         # Prepare input tensor for the model
         target_seq_length = (
             seq_length if seq_length is not None else self.default_frame_count
         input_tensor = input_tensor.view(1, target_seq_length, 3, *self.resolution)
         input_tensor = input_tensor.to(self.device)
         input_tensor.requires_grad_()
+        # Forward pass to get model output dict
+        with torch.no_grad():
+            output_dict = self.model(input_tensor)
+        # Extract relevant outputs
+        fmap = output_dict["fmap"]
+        attn_wts = output_dict["attn"]
+        logits = output_dict["logits"]
+        # Register hook for Grad-CAM
+        fmap.requires_grad_()
         fmap.register_hook(self.save_gradients)
         # Get predictions for all classes
+        class_probs = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
         # Get the predicted class
         predicted_class_idx = np.argmax(class_probs)
         predicted_class = (
             else "Unknown"
         )
         prediction = "Deepfake" if predicted_class_idx > 0 else "Real"
         # Format confidence values to 2 decimal places
         confidence_class = round(class_probs[predicted_class_idx] * 100, 2)
         confidence_deepfake_real = (
             else round(class_probs[0] * 100, 2)
         )
         prediction_string = f"{prediction} {confidence_deepfake_real:.2f}% Confidence"
         # Create detailed classification results
         classification_details = (
             {
                 "confidence(%)": f"{confidence_class:.2f}",
             }
         )
         # Backpropagate for Grad-CAM
         self.model.zero_grad()
+        logits[0, predicted_class_idx].backward()
         grads = self.gradients
         # Generate Grad-CAM visualization for the best frame
         if raw_frames:
             middle_idx = len(raw_frames) // 2
             gradcam_image = self.generate_gradcam(fmap, raw_frames[middle_idx], grads)
         else:
             gradcam_image = None
         return prediction_string, gradcam_image, classification_details