File size: 1,746 Bytes
b29dd97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import cv2
import torch
import os
import numpy as np
from torchvision import transforms
def process_video(video_path, model, output_path, device):
transform = transforms.Compose([
transforms.ToTensor()
])
cap = cv2.VideoCapture(str(video_path))
if not cap.isOpened():
raise IOError(f"Cannot open video: {video_path}")
fourcc = cv2.VideoWriter_fourcc(*'XVID')
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
frame_skip = 2
frame_count = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
resized_frame = cv2.resize(frame, (640, 480))
if frame_count % frame_skip == 0:
rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
img_tensor = transform(rgb_frame).to(device)
with torch.no_grad():
prediction = model([img_tensor])[0]
for box, score, label in zip(prediction["boxes"], prediction["scores"], prediction["labels"]):
if score > 0.5:
x1, y1, x2, y2 = map(int, box)
cv2.rectangle(resized_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(resized_frame, f"{label.item()}:{score:.2f}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
output_frame = cv2.resize(resized_frame, (width, height))
out.write(output_frame)
frame_count += 1
cap.release()
out.release()
return output_path
|