Goutham204's picture
Upload 8 files
b29dd97 verified
import cv2
import torch
import os
import numpy as np
from torchvision import transforms
def process_video(video_path, model, output_path, device):
transform = transforms.Compose([
transforms.ToTensor()
])
cap = cv2.VideoCapture(str(video_path))
if not cap.isOpened():
raise IOError(f"Cannot open video: {video_path}")
fourcc = cv2.VideoWriter_fourcc(*'XVID')
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
frame_skip = 2
frame_count = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
resized_frame = cv2.resize(frame, (640, 480))
if frame_count % frame_skip == 0:
rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
img_tensor = transform(rgb_frame).to(device)
with torch.no_grad():
prediction = model([img_tensor])[0]
for box, score, label in zip(prediction["boxes"], prediction["scores"], prediction["labels"]):
if score > 0.5:
x1, y1, x2, y2 = map(int, box)
cv2.rectangle(resized_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(resized_frame, f"{label.item()}:{score:.2f}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
output_frame = cv2.resize(resized_frame, (width, height))
out.write(output_frame)
frame_count += 1
cap.release()
out.release()
return output_path