| | import cv2 |
| | import sys |
| | from sahi.models.yolov8 import Yolov8DetectionModel |
| | from sahi.predict import get_sliced_prediction |
| | import supervision as sv |
| | import numpy as np |
| |
|
| | |
| | if len(sys.argv) != 8: |
| | print("Usage: python yolov8_video_inference.py <model_path> <input_video_path> <output_video_path> <slice_height> <slice_width> <overlap_height_ratio> <overlap_width_ratio>") |
| | sys.exit(1) |
| |
|
| | |
| | model_path = sys.argv[1] |
| | input_video_path = sys.argv[2] |
| | output_video_path = sys.argv[3] |
| | slice_height = int(sys.argv[4]) |
| | slice_width = int(sys.argv[5]) |
| | overlap_height_ratio = float(sys.argv[6]) |
| | overlap_width_ratio = float(sys.argv[7]) |
| |
|
| | |
| | detection_model = Yolov8DetectionModel( |
| | model_path=model_path, |
| | confidence_threshold=0.25, |
| | device="cuda" |
| | ) |
| |
|
| | |
| | video_info = sv.VideoInfo.from_video_path(video_path=input_video_path) |
| |
|
| | |
| | cap = cv2.VideoCapture(input_video_path) |
| | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
| | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
| | fps = cap.get(cv2.CAP_PROP_FPS) |
| | fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
| |
|
| | |
| | out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height)) |
| |
|
| | |
| | tracker = sv.ByteTrack(frame_rate=video_info.fps) |
| | smoother = sv.DetectionsSmoother() |
| |
|
| | |
| | box_annotator = sv.BoxCornerAnnotator(thickness=2) |
| | label_annotator = sv.LabelAnnotator( |
| | text_scale=0.5, |
| | text_thickness=1, |
| | text_padding=1 |
| | ) |
| |
|
| | |
| | frame_count = 0 |
| | class_id_to_name = {} |
| |
|
| | while cap.isOpened(): |
| | ret, frame = cap.read() |
| | if not ret: |
| | break |
| |
|
| | |
| | result = get_sliced_prediction( |
| | image=frame, |
| | detection_model=detection_model, |
| | slice_height=slice_height, |
| | slice_width=slice_width, |
| | overlap_height_ratio=overlap_height_ratio, |
| | overlap_width_ratio=overlap_width_ratio |
| | ) |
| |
|
| | |
| | object_predictions = result.object_prediction_list |
| |
|
| | |
| | xyxy = [] |
| | confidences = [] |
| | class_ids = [] |
| | |
| | for pred in object_predictions: |
| | if pred.category.id not in class_id_to_name: |
| | class_id_to_name[pred.category.id] = pred.category.name |
| |
|
| | |
| | for pred in object_predictions: |
| | bbox = pred.bbox.to_xyxy() |
| | xyxy.append(bbox) |
| | confidences.append(pred.score.value) |
| | class_ids.append(pred.category.id) |
| |
|
| | |
| | if xyxy: |
| | |
| | xyxy = np.array(xyxy, dtype=np.float32) |
| | confidences = np.array(confidences, dtype=np.float32) |
| | class_ids = np.array(class_ids, dtype=int) |
| |
|
| | |
| | detections = sv.Detections( |
| | xyxy=xyxy, |
| | confidence=confidences, |
| | class_id=class_ids |
| | ) |
| |
|
| | |
| | detections = tracker.update_with_detections(detections) |
| |
|
| | |
| | detections = smoother.update_with_detections(detections) |
| |
|
| | |
| | |
| | labels = [] |
| | for i in range(len(detections.xyxy)): |
| | class_id = detections.class_id[i] |
| | confidence = detections.confidence[i] |
| | class_name = class_id_to_name.get(class_id, 'Unknown') |
| | label = f"{class_name} {confidence:.2f}" |
| |
|
| | |
| | if hasattr(detections, 'tracker_id') and detections.tracker_id is not None: |
| | tracker_id = detections.tracker_id[i] |
| | label = f"ID {tracker_id} {label}" |
| |
|
| | labels.append(label) |
| |
|
| | |
| | annotated_frame = frame.copy() |
| | annotated_frame = box_annotator.annotate( |
| | scene=annotated_frame, |
| | detections=detections |
| | ) |
| | annotated_frame = label_annotator.annotate( |
| | scene=annotated_frame, |
| | detections=detections, |
| | labels=labels |
| | ) |
| | else: |
| | |
| | annotated_frame = frame.copy() |
| |
|
| | |
| | out.write(annotated_frame) |
| |
|
| | frame_count += 1 |
| | print(f"Processed frame {frame_count}", end='\r') |
| |
|
| | |
| | cap.release() |
| | out.release() |
| | print("\nInference complete. Video saved at", output_video_path) |
| |
|