Spaces:

JonSnow1512
/

video-crime-classifier

Runtime error

App Files Files Community

JonSnow1512 commited on May 4, 2025

Commit

c2d642e

verified ·

1 Parent(s): 22d1533

Create app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+import torch
+import cv2
+import numpy as np
+import gradio as gr
+from torchvision import transforms
+from transformers import VideoMAEForVideoClassification
+# Class mapping
+class_mapping = {
+    "Abuse": 0, "Arrest": 1, "Arson": 2, "Assault": 3, "Burglary": 4,
+    "Explosion": 5, "Fighting": 6, "Normal Videos": 7, "Road Accidents": 8,
+    "Robbery": 9, "Shooting": 10, "Shoplifting": 11, "Stealing": 12, "Vandalism": 13
+}
+reverse_mapping = {v: k for k, v in class_mapping.items()}
+# Load model
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = VideoMAEForVideoClassification.from_pretrained(
+    "OPear/videomae-large-finetuned-UCF-Crime",
+    label2id=class_mapping,
+    id2label=reverse_mapping,
+    ignore_mismatched_sizes=True,
+).to(device)
+model.eval()
+# Preprocessing function
+def load_video_frames(video_path, num_frames=16, size=(224, 224)):
+    cap = cv2.VideoCapture(video_path)
+    frames = []
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
+    for i in range(total_frames):
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if i in frame_indices:
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            frame = cv2.resize(frame, size)
+            frames.append(frame)
+    cap.release()
+    if len(frames) == 0:
+        raise ValueError("No frames read from video.")
+    if len(frames) < num_frames:
+        frames.extend([frames[-1]] * (num_frames - len(frames)))
+    frames = np.stack(frames, axis=0)
+    frames = torch.tensor(frames, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225])
+    frames = torch.stack([normalize(f) for f in frames])
+    return frames  # [T, 3, H, W]
+# Prediction function
+def predict_crime(video_file):
+    try:
+        frames = load_video_frames(video_file)
+        input_tensor = frames.permute(1, 0, 2, 3).unsqueeze(0).to(device)  # [1, 3, T, H, W]
+        with torch.no_grad():
+            outputs = model(input_tensor)
+            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+            pred_id = torch.argmax(probs, dim=-1).item()
+            pred_class = reverse_mapping[pred_id]
+            confidence = probs[0][pred_id].item()
+        return f"**Predicted Class:** {pred_class}\n**Confidence:** {confidence:.4f}"
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio interface
+interface = gr.Interface(
+    fn=predict_crime,
+    inputs=gr.Video(label="Upload a Crime-related Video", type="filepath"),
+    outputs="markdown",
+    title="🎥 Crime Type Classifier",
+    description="Upload a video (preferably 5–10s, .mp4 format). The model predicts the crime type using a fine-tuned VideoMAE."
+)
+if __name__ == "__main__":
+    interface.launch()