Spaces:
Paused
Paused
| # for Zero GPU Spaces compatibility | |
| import spaces | |
| def dummy_gpu(): | |
| pass | |
| import gradio as gr | |
| import numpy as np | |
| import cv2 | |
| import torch | |
| import onnxruntime as ort | |
| from optimum.onnxruntime import ORTModel | |
| from ultralytics import YOLO | |
| import os | |
| from typing import Tuple, List | |
| import subprocess | |
| def install_cuda_toolkit(): | |
| print("Installing CUDA Toolkit.") | |
| #CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run" | |
| CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run" | |
| CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL) | |
| subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE]) | |
| subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE]) | |
| subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"]) | |
| os.environ["CUDA_HOME"] = "/usr/local/cuda" | |
| os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"]) | |
| os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % ( | |
| os.environ["CUDA_HOME"], | |
| "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"], | |
| ) | |
| # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range | |
| os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" | |
| #install_cuda_toolkit() | |
| # Configuration - UPDATE THESE VALUES | |
| MODEL_PT_PATH = "model.pt" # Your trained PyTorch model | |
| MODEL_ONNX_PATH = "model.onnx" # Output ONNX model name | |
| INPUT_SIZE = 640 # Must match training size | |
| CLASS_NAMES = ["class0", "class1"] # Your actual class names | |
| CONF_THRESHOLD = 0.5 # Confidence threshold | |
| IOU_THRESHOLD = 0.45 # NMS IoU threshold | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| session_options = ort.SessionOptions() | |
| session_options.log_severity_level = 0 | |
| def convert_pt_to_onnx(): | |
| """Convert PyTorch model to ONNX format if not exists""" | |
| print(f'Converting model on {"cuda" if torch.cuda.is_available() else "cpu"}') | |
| if not os.path.exists(MODEL_ONNX_PATH): | |
| print("Converting PyTorch model to ONNX...") | |
| try: | |
| # Load trained YOLO model | |
| model = YOLO(MODEL_PT_PATH) | |
| # Export to ONNX with correct parameters | |
| model.export( | |
| format="onnx", | |
| imgsz=INPUT_SIZE, | |
| opset=12, | |
| simplify=True, | |
| dynamic=False, | |
| half=False # Disable for maximum compatibility | |
| ) | |
| # Rename exported model (Ultralytics uses default name) | |
| if os.path.exists("yolov8n.onnx"): | |
| os.rename("yolov8n.onnx", MODEL_ONNX_PATH) | |
| print("ONNX conversion successful!") | |
| except Exception as e: | |
| raise RuntimeError(f"ONNX conversion failed: {str(e)}") | |
| def load_onnx_model() -> ort.InferenceSession: | |
| """Initialize ONNX runtime session""" | |
| print(f'Loading model on {"cuda" if torch.cuda.is_available() else "cpu"}') | |
| providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if DEVICE != "cpu" else ['CPUExecutionProvider'] | |
| try: | |
| #return ort.InferenceSession(MODEL_ONNX_PATH, providers=providers, session_options=session_options, export=True) | |
| return ORTModel.load_model(MODEL_ONNX_PATH, provider='CUDAExecutionProvider' if DEVICE != "cpu" else 'CPUExecutionProvider', session_options=session_options) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to load ONNX model: {str(e)}") | |
| # Initialize model | |
| convert_pt_to_onnx() | |
| ort_session = load_onnx_model() | |
| print("Available Providers: ", ort_session._providers) | |
| #assert "CUDAExecutionProvider" in ort_session._providers | |
| def letterbox_image(image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]: | |
| """ | |
| Preprocess image using YOLO's letterboxing method | |
| Returns: | |
| - Processed image tensor | |
| - Scale ratio (original to processed) | |
| - Padding dimensions (width, height) | |
| """ | |
| # Get original dimensions | |
| h, w = image.shape[:2] | |
| # Calculate scale and new dimensions | |
| scale = min(INPUT_SIZE / h, INPUT_SIZE / w) | |
| new_h, new_w = int(h * scale), int(w * scale) | |
| # Resize with antialiasing | |
| resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA) | |
| # Create canvas with 114-gray background | |
| canvas = np.full((INPUT_SIZE, INPUT_SIZE, 3), 114, dtype=np.uint8) | |
| # Calculate padding offsets | |
| pad_w = (INPUT_SIZE - new_w) // 2 | |
| pad_h = (INPUT_SIZE - new_h) // 2 | |
| # Paste resized image onto canvas | |
| canvas[pad_h:pad_h+new_h, pad_w:pad_w+new_w] = resized | |
| # Convert to float32 and normalize | |
| processed = canvas.astype(np.float32) / 255.0 | |
| # Transpose to CHW format and add batch dimension | |
| processed = processed.transpose(2, 0, 1)[None, ...] | |
| return processed, scale, (pad_w, pad_h) | |
| def process_detections( | |
| outputs: np.ndarray, | |
| scale: float, | |
| padding: Tuple[int, int], | |
| orig_shape: Tuple[int, int] | |
| ) -> Tuple[List[List[int]], List[float], List[int]]: | |
| """ | |
| Process raw model outputs into usable detections | |
| Returns: | |
| - List of bounding boxes [x1, y1, x2, y2] | |
| - List of confidence scores | |
| - List of class IDs | |
| """ | |
| # Transpose and squeeze outputs | |
| predictions = np.squeeze(outputs[0]).T | |
| # Filter by confidence threshold | |
| scores = np.max(predictions[:, 4:], axis=1) | |
| valid = scores > CONF_THRESHOLD | |
| predictions = predictions[valid] | |
| scores = scores[valid] | |
| if predictions.shape[0] == 0: | |
| return [], [], [] | |
| # Extract boxes and classes | |
| boxes = predictions[:, :4] | |
| class_ids = np.argmax(predictions[:, 4:], axis=1) | |
| # Convert from center to corner coordinates | |
| boxes[:, [0, 1]] = boxes[:, [0, 1]] - boxes[:, [2, 3]] / 2 # xy top-left | |
| boxes[:, [2, 3]] = boxes[:, [0, 1]] + boxes[:, [2, 3]] # xy bottom-right | |
| # Adjust for letterbox padding and scale | |
| pad_w, pad_h = padding | |
| boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale | |
| boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale | |
| # Clip coordinates to image dimensions | |
| boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, orig_shape[1]) | |
| boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, orig_shape[0]) | |
| # Convert to integer coordinates | |
| boxes = boxes.round().astype(int) | |
| # Apply NMS | |
| indices = cv2.dnn.NMSBoxes( | |
| boxes.tolist(), | |
| scores.tolist(), | |
| CONF_THRESHOLD, | |
| IOU_THRESHOLD | |
| ) | |
| if len(indices) == 0: | |
| return [], [], [] | |
| # Return filtered results | |
| return boxes[indices], scores[indices], class_ids[indices] | |
| def draw_detections( | |
| image: np.ndarray, | |
| boxes: List[List[int]], | |
| scores: List[float], | |
| class_ids: List[int] | |
| ) -> np.ndarray: | |
| """Draw bounding boxes and labels on image""" | |
| output = image.copy() | |
| for box, score, class_id in zip(boxes, scores, class_ids): | |
| x1, y1, x2, y2 = box | |
| # Draw bounding box | |
| color = (0, 255, 0) # Green | |
| cv2.rectangle(output, (x1, y1), (x2, y2), color, 2) | |
| # Create label | |
| label = f"{CLASS_NAMES[class_id]}: {score:.2f}" | |
| # Get text size | |
| (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) | |
| # Draw text background | |
| cv2.rectangle( | |
| output, | |
| (x1, y1 - th - 4), | |
| (x1 + tw, y1), | |
| color, | |
| -1 | |
| ) | |
| # Draw text | |
| cv2.putText( | |
| output, | |
| label, | |
| (x1, y1 - 4), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.5, | |
| (0, 0, 0), | |
| 1, | |
| cv2.LINE_AA | |
| ) | |
| return output | |
| def inference_frame(frame: np.ndarray) -> np.ndarray: | |
| """Full processing pipeline for single frame""" | |
| # Preprocess | |
| input_tensor, scale, padding = letterbox_image(frame) | |
| # Inference | |
| outputs = ort_session.run( | |
| None, | |
| {ort_session.get_inputs()[0].name: input_tensor} | |
| ) | |
| # Post-process | |
| boxes, scores, class_ids = process_detections( | |
| outputs, | |
| scale, | |
| padding, | |
| frame.shape[:2] | |
| ) | |
| # Draw results | |
| if len(boxes) > 0: | |
| frame = draw_detections(frame, boxes, scores, class_ids) | |
| return frame | |
| # Gradio interface setup | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Real-Time YOLOv8 Object Detection") | |
| with gr.Row(): | |
| webcam = gr.Image( | |
| sources=["webcam"], | |
| streaming=True, | |
| label="Webcam Input" | |
| ) | |
| output = gr.Image( | |
| label="Detections", | |
| interactive=False | |
| ) | |
| webcam.stream( | |
| fn=inference_frame, | |
| inputs=webcam, | |
| outputs=output, | |
| show_progress="hidden" | |
| ) | |
| if __name__ == "__main__": | |
| app.launch(show_error=True) | |
| # https://discuss.huggingface.co/t/failed-to-create-cudaexecutionprovider/26501 | |
| # https://stackoverflow.com/questions/75267445/why-does-onnxruntime-fail-to-create-cudaexecutionprovider-in-linuxubuntu-20 | |
| # https://github.com/microsoft/onnxruntime/issues/4292 | |
| # https://github.com/ultralytics/ultralytics/issues/664 |