File size: 1,484 Bytes
d10f051
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import torch
from transformers import AutoModelForVideoClassification, AutoProcessor
import cv2
import numpy as np

# Load model and processor
model = AutoModelForVideoClassification.from_pretrained("Nikeytas/videomae-crime-detector-fixed-format")
processor = AutoProcessor.from_pretrained("Nikeytas/videomae-crime-detector-fixed-format")

# Process video
def classify_video(video_path, num_frames=16):
    # Extract frames
    cap = cv2.VideoCapture(video_path)
    frames = []
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame_rgb)
    
    cap.release()
    
    # Process with model
    inputs = processor(frames, return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=-1).item()
        confidence = predictions[0][predicted_class].item()
    
    label = "Violent Crime" if predicted_class == 1 else "Non-Violent"
    return label, confidence

# Example usage
video_path = "path/to/your/video.mp4"
prediction, confidence = classify_video(video_path)
print(f"Prediction: {prediction} (Confidence: {confidence:.3f})")