Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>ONNX YOLO Segmentation Web Demo</title> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script> | |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> | |
| <style> | |
| .detection-box { | |
| position: absolute; | |
| border: 2px solid #3B82F6; | |
| background-color: rgba(59, 130, 246, 0.2); | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: flex-end; | |
| color: white; | |
| font-weight: bold; | |
| font-size: 12px; | |
| } | |
| .detection-label { | |
| background-color: #3B82F6; | |
| padding: 2px 5px; | |
| border-radius: 3px; | |
| margin-bottom: 2px; | |
| } | |
| .pulse { | |
| animation: pulse 2s infinite; | |
| } | |
| @keyframes pulse { | |
| 0% { | |
| box-shadow: 0 0 0 0 rgba(59, 130, 246, 0.7); | |
| } | |
| 70% { | |
| box-shadow: 0 0 0 10px rgba(59, 130, 246, 0); | |
| } | |
| 100% { | |
| box-shadow: 0 0 0 0 rgba(59, 130, 246, 0); | |
| } | |
| } | |
| #video-container { | |
| position: relative; | |
| width: 100%; | |
| max-width: 640px; | |
| margin: 0 auto; | |
| border-radius: 8px; | |
| overflow: hidden; | |
| box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05); | |
| } | |
| #video, #canvas { | |
| width: 100%; | |
| height: auto; | |
| display: block; | |
| } | |
| #canvas { | |
| position: absolute; | |
| top: 0; | |
| left: 0; | |
| z-index: 10; | |
| } | |
| #segmentation { | |
| position: absolute; | |
| top: 0; | |
| left: 0; | |
| z-index: 5; | |
| opacity: 0.5; | |
| } | |
| .dropzone { | |
| border: 2px dashed #4B5563; | |
| border-radius: 8px; | |
| padding: 20px; | |
| text-align: center; | |
| cursor: pointer; | |
| transition: all 0.3s; | |
| } | |
| .dropzone:hover { | |
| border-color: #3B82F6; | |
| background-color: rgba(59, 130, 246, 0.1); | |
| } | |
| .dropzone.active { | |
| border-color: #3B82F6; | |
| background-color: rgba(59, 130, 246, 0.2); | |
| } | |
| .status-badge { | |
| display: inline-flex; | |
| align-items: center; | |
| padding: 4px 8px; | |
| border-radius: 9999px; | |
| font-size: 12px; | |
| font-weight: 600; | |
| } | |
| .status-badge.ready { | |
| background-color: rgba(16, 185, 129, 0.2); | |
| color: #10B981; | |
| } | |
| .status-badge.loading { | |
| background-color: rgba(245, 158, 11, 0.2); | |
| color: #F59E0B; | |
| } | |
| .status-badge.error { | |
| background-color: rgba(239, 68, 68, 0.2); | |
| color: #EF4444; | |
| } | |
| .status-badge.disabled { | |
| background-color: rgba(75, 85, 99, 0.2); | |
| color: #4B5563; | |
| } | |
| .output-log { | |
| font-family: 'Courier New', Courier, monospace; | |
| background-color: rgba(31, 41, 55, 0.8); | |
| border-radius: 8px; | |
| padding: 16px; | |
| max-height: 200px; | |
| overflow-y: auto; | |
| } | |
| .legend { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin-top: 8px; | |
| } | |
| .legend-item { | |
| display: flex; | |
| align-items: center; | |
| font-size: 12px; | |
| } | |
| .legend-color { | |
| width: 16px; | |
| height: 16px; | |
| border-radius: 3px; | |
| margin-right: 4px; | |
| } | |
| .confidence-bar { | |
| height: 4px; | |
| background-color: #4B5563; | |
| border-radius: 2px; | |
| margin-top: 2px; | |
| overflow: hidden; | |
| } | |
| .confidence-fill { | |
| height: 100%; | |
| background-color: #10B981; | |
| } | |
| .debug-output { | |
| font-family: 'Courier New', Courier, monospace; | |
| background-color: rgba(31, 41, 55, 0.8); | |
| border-radius: 8px; | |
| padding: 16px; | |
| max-height: 200px; | |
| overflow-y: auto; | |
| margin-top: 16px; | |
| font-size: 12px; | |
| white-space: pre-wrap; | |
| } | |
| </style> | |
| </head> | |
| <body class="bg-gray-900 text-gray-100 min-h-screen"> | |
| <div class="container mx-auto px-4 py-8"> | |
| <header class="text-center mb-8"> | |
| <h1 class="text-3xl md:text-4xl font-bold mb-2 text-blue-400"> | |
| <i class="fas fa-shapes mr-2"></i> YOLO Segmentation Web Demo | |
| </h1> | |
| <p class="text-gray-400 max-w-2xl mx-auto"> | |
| Real-time instance segmentation with YOLO ONNX models in your browser | |
| </p> | |
| </header> | |
| <div class="max-w-4xl mx-auto"> | |
| <div class="grid grid-cols-1 md:grid-cols-2 gap-8"> | |
| <!-- Left column - Controls --> | |
| <div class="space-y-6"> | |
| <!-- Model Selection --> | |
| <div class="bg-gray-800 rounded-lg p-6 shadow-lg"> | |
| <h2 class="text-xl font-bold mb-4 text-blue-400"> | |
| <i class="fas fa-file-export mr-2"></i> Model Selection | |
| </h2> | |
| <div id="dropzone" class="dropzone mb-4"> | |
| <div class="flex flex-col items-center justify-center py-4"> | |
| <i class="fas fa-file-upload text-4xl text-blue-400 mb-2"></i> | |
| <p class="text-gray-300">Drag & drop your YOLO ONNX model file here</p> | |
| <p class="text-gray-400 text-sm mt-1">or click to browse</p> | |
| <input type="file" id="modelFile" accept=".onnx" class="hidden" /> | |
| </div> | |
| </div> | |
| <div class="flex items-center justify-between"> | |
| <div> | |
| <p id="modelStatusText" class="text-sm text-gray-400">No model selected</p> | |
| <p id="modelSizeText" class="text-xs text-gray-500"></p> | |
| </div> | |
| <span id="modelStatusBadge" class="status-badge disabled"> | |
| <i class="fas fa-times-circle mr-1"></i> Not Loaded | |
| </span> | |
| </div> | |
| </div> | |
| <!-- Detection Settings --> | |
| <div class="bg-gray-800 rounded-lg p-6 shadow-lg"> | |
| <h2 class="text-xl font-bold mb-4 text-blue-400"> | |
| <i class="fas fa-sliders-h mr-2"></i> Detection Settings | |
| </h2> | |
| <div class="space-y-4"> | |
| <div> | |
| <label for="confidenceThreshold" class="block text-sm font-medium text-gray-300 mb-1"> | |
| Confidence Threshold: <span id="confidenceValue">0.5</span> | |
| </label> | |
| <input type="range" id="confidenceThreshold" min="0" max="1" step="0.05" value="0.5" | |
| class="w-full h-2 bg-gray-700 rounded-lg appearance-none cursor-pointer"> | |
| </div> | |
| <div> | |
| <label for="iouThreshold" class="block text-sm font-medium text-gray-300 mb-1"> | |
| IOU Threshold: <span id="iouValue">0.45</span> | |
| </label> | |
| <input type="range" id="iouThreshold" min="0" max="1" step="0.05" value="0.45" | |
| class="w-full h-2 bg-gray-700 rounded-lg appearance-none cursor-pointer"> | |
| </div> | |
| <div class="flex items-center justify-between"> | |
| <label for="showMasks" class="text-sm font-medium text-gray-300"> | |
| Show Segmentation Masks | |
| </label> | |
| <label class="relative inline-flex items-center cursor-pointer"> | |
| <input type="checkbox" id="showMasks" class="sr-only peer" checked> | |
| <div class="w-11 h-6 bg-gray-700 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"></div> | |
| </label> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Webcam Controls --> | |
| <div class="bg-gray-800 rounded-lg p-6 shadow-lg"> | |
| <h2 class="text-xl font-bold mb-4 text-blue-400"> | |
| <i class="fas fa-video mr-2"></i> Webcam Controls | |
| </h2> | |
| <div class="flex flex-col space-y-4"> | |
| <button id="startBtn" class="bg-green-600 hover:bg-green-700 text-white font-bold py-3 px-6 rounded-lg flex items-center justify-center disabled:opacity-50 disabled:cursor-not-allowed" disabled> | |
| <i class="fas fa-play mr-2"></i> Start Detection | |
| </button> | |
| <div class="flex items-center justify-between"> | |
| <div> | |
| <p class="text-sm text-gray-400">Webcam Status</p> | |
| </div> | |
| <span id="webcamStatusBadge" class="status-badge disabled"> | |
| <i class="fas fa-times-circle mr-1"></i> Inactive | |
| </span> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Performance Stats --> | |
| <div class="bg-gray-800 rounded-lg p-6 shadow-lg"> | |
| <h2 class="text-xl font-bold mb-4 text-blue-400"> | |
| <i class="fas fa-tachometer-alt mr-2"></i> Performance | |
| </h2> | |
| <div class="grid grid-cols-2 gap-4"> | |
| <div class="bg-gray-700 p-4 rounded-lg text-center"> | |
| <div class="text-2xl font-bold text-blue-400" id="fpsCounter">-</div> | |
| <div class="text-gray-300 text-sm">FPS</div> | |
| </div> | |
| <div class="bg-gray-700 p-4 rounded-lg text-center"> | |
| <div class="text-2xl font-bold text-green-400" id="inferenceTime">-</div> | |
| <div class="text-gray-300 text-sm">ms/inference</div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Right column - Output --> | |
| <div class="space-y-6"> | |
| <!-- Video Feed --> | |
| <div class="bg-gray-800 rounded-lg p-6 shadow-lg"> | |
| <h2 class="text-xl font-bold mb-4 text-blue-400"> | |
| <i class="fas fa-eye mr-2"></i> Live Detection | |
| </h2> | |
| <div id="video-container" class="relative"> | |
| <div id="videoPlaceholder" class="bg-gray-700 rounded-lg flex items-center justify-center aspect-square"> | |
| <div class="text-center p-8"> | |
| <i class="fas fa-camera text-4xl text-gray-500 mb-4"></i> | |
| <p class="text-gray-400">Webcam feed will appear here</p> | |
| </div> | |
| </div> | |
| <video id="video" autoplay playsinline muted class="hidden"></video> | |
| <canvas id="segmentation" class="hidden"></canvas> | |
| <canvas id="canvas" class="hidden"></canvas> | |
| </div> | |
| <div id="detectionLegend" class="legend mt-4 hidden"> | |
| <!-- Legend items will be added dynamically --> | |
| </div> | |
| </div> | |
| <!-- Output Log --> | |
| <div class="bg-gray-800 rounded-lg p-6 shadow-lg"> | |
| <h2 class="text-xl font-bold mb-4 text-blue-400"> | |
| <i class="fas fa-terminal mr-2"></i> Output Log | |
| </h2> | |
| <div class="output-log text-sm" id="log"> | |
| <p class="text-gray-400">Waiting for model to load...</p> | |
| </div> | |
| </div> | |
| <!-- Debug Output --> | |
| <div class="bg-gray-800 rounded-lg p-6 shadow-lg"> | |
| <h2 class="text-xl font-bold mb-4 text-blue-400"> | |
| <i class="fas fa-bug mr-2"></i> Debug Output | |
| </h2> | |
| <div class="debug-output" id="debugOutput"> | |
| <p class="text-gray-400">Raw tensor output will appear here</p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <footer class="mt-12 text-center text-gray-500 text-sm"> | |
| <p>Powered by ONNX Runtime Web - All processing happens in your browser</p> | |
| </footer> | |
| </div> | |
| <script> | |
| // DOM elements | |
| const video = document.getElementById('video'); | |
| const canvas = document.getElementById('canvas'); | |
| const segmentationCanvas = document.getElementById('segmentation'); | |
| const ctx = canvas.getContext('2d'); | |
| const segCtx = segmentationCanvas.getContext('2d'); | |
| const startBtn = document.getElementById('startBtn'); | |
| const logElement = document.getElementById('log'); | |
| const debugOutput = document.getElementById('debugOutput'); | |
| const modelFileInput = document.getElementById('modelFile'); | |
| const dropzone = document.getElementById('dropzone'); | |
| const modelStatusText = document.getElementById('modelStatusText'); | |
| const modelSizeText = document.getElementById('modelSizeText'); | |
| const modelStatusBadge = document.getElementById('modelStatusBadge'); | |
| const webcamStatusBadge = document.getElementById('webcamStatusBadge'); | |
| const fpsCounter = document.getElementById('fpsCounter'); | |
| const inferenceTime = document.getElementById('inferenceTime'); | |
| const videoPlaceholder = document.getElementById('videoPlaceholder'); | |
| const videoContainer = document.getElementById('video-container'); | |
| const confidenceThreshold = document.getElementById('confidenceThreshold'); | |
| const iouThreshold = document.getElementById('iouThreshold'); | |
| const confidenceValue = document.getElementById('confidenceValue'); | |
| const iouValue = document.getElementById('iouValue'); | |
| const showMasks = document.getElementById('showMasks'); | |
| const detectionLegend = document.getElementById('detectionLegend'); | |
| // App state | |
| let session = null; | |
| let modelBuffer = null; | |
| let isRunning = false; | |
| let frameCount = 0; | |
| let lastFpsUpdate = 0; | |
| let fps = 0; | |
| let lastInferenceTime = 0; | |
| let classColors = {}; | |
| let classNames = {}; // Will be populated based on model output | |
| // Update log with timestamp | |
| function log(message) { | |
| const now = new Date(); | |
| const timestamp = now.toLocaleTimeString(); | |
| const logEntry = document.createElement('p'); | |
| logEntry.innerHTML = `<span class="text-gray-500">[${timestamp}]</span> ${message}`; | |
| logElement.appendChild(logEntry); | |
| logElement.scrollTop = logElement.scrollHeight; | |
| } | |
| // Update debug output with raw tensor data | |
| function debugLog(message) { | |
| const debugEntry = document.createElement('div'); | |
| debugEntry.textContent = message; | |
| debugOutput.appendChild(debugEntry); | |
| debugOutput.scrollTop = debugOutput.scrollHeight; | |
| } | |
| // Generate random colors for classes | |
| function generateClassColors(count) { | |
| const colors = {}; | |
| for (let i = 0; i < count; i++) { | |
| // Generate a bright color | |
| const hue = (i * 360 / count) % 360; | |
| colors[i] = `hsl(${hue}, 80%, 60%)`; | |
| } | |
| return colors; | |
| } | |
| // Update settings UI | |
| confidenceThreshold.addEventListener('input', () => { | |
| confidenceValue.textContent = confidenceThreshold.value; | |
| }); | |
| iouThreshold.addEventListener('input', () => { | |
| iouValue.textContent = iouThreshold.value; | |
| }); | |
| // Set up dropzone interactions | |
| dropzone.addEventListener('click', () => { | |
| modelFileInput.click(); | |
| }); | |
| ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => { | |
| dropzone.addEventListener(eventName, preventDefaults, false); | |
| }); | |
| function preventDefaults(e) { | |
| e.preventDefault(); | |
| e.stopPropagation(); | |
| } | |
| ['dragenter', 'dragover'].forEach(eventName => { | |
| dropzone.addEventListener(eventName, highlight, false); | |
| }); | |
| ['dragleave', 'drop'].forEach(eventName => { | |
| dropzone.addEventListener(eventName, unhighlight, false); | |
| }); | |
| function highlight() { | |
| dropzone.classList.add('active'); | |
| } | |
| function unhighlight() { | |
| dropzone.classList.remove('active'); | |
| } | |
| dropzone.addEventListener('drop', handleDrop, false); | |
| // Handle model file selection | |
| function handleDrop(e) { | |
| const dt = e.dataTransfer; | |
| const files = dt.files; | |
| if (files.length > 0 && files[0].name.endsWith('.onnx')) { | |
| handleModelFile(files[0]); | |
| } | |
| } | |
| modelFileInput.addEventListener('change', (e) => { | |
| const files = e.target.files; | |
| if (files.length > 0 && files[0].name.endsWith('.onnx')) { | |
| handleModelFile(files[0]); | |
| } | |
| }); | |
| // Process the selected model file | |
| async function handleModelFile(file) { | |
| try { | |
| // Update UI | |
| modelStatusText.textContent = `Loading ${file.name}...`; | |
| modelSizeText.textContent = `(${(file.size/1e6).toFixed(1)} MB)`; | |
| modelStatusBadge.className = 'status-badge loading'; | |
| modelStatusBadge.innerHTML = '<i class="fas fa-spinner fa-spin mr-1"></i> Loading'; | |
| startBtn.disabled = true; | |
| // Read the file | |
| const reader = new FileReader(); | |
| reader.onload = async (ev) => { | |
| modelBuffer = ev.target.result; | |
| // Initialize ONNX session | |
| log(`Initializing ONNX session for ${file.name}`); | |
| try { | |
| // Create session options with WebGL and WASM backends | |
| const sessionOptions = { | |
| executionProviders: ['webgl', 'wasm'], | |
| graphOptimizationLevel: 'all' | |
| }; | |
| // Try to create session with WebGL first, fall back to WASM if needed | |
| try { | |
| session = await ort.InferenceSession.create(modelBuffer, sessionOptions); | |
| } catch (webglError) { | |
| log(`WebGL backend failed, falling back to WASM: ${webglError.message}`); | |
| sessionOptions.executionProviders = ['wasm']; | |
| session = await ort.InferenceSession.create(modelBuffer, sessionOptions); | |
| } | |
| // Generate colors for classes (assuming 80 classes for YOLO) | |
| classColors = generateClassColors(80); | |
| // Success | |
| modelStatusText.textContent = `Loaded: ${file.name}`; | |
| modelStatusBadge.className = 'status-badge ready'; | |
| modelStatusBadge.innerHTML = '<i class="fas fa-check-circle mr-1"></i> Ready'; | |
| startBtn.disabled = false; | |
| log(`Model loaded successfully with ${session.inputNames.length} inputs and ${session.outputNames.length} outputs`); | |
| log(`Input shape: ${JSON.stringify(session.inputs[0].dims)}`); | |
| // Check if this is a segmentation model | |
| const isSegmentation = session.outputNames.some(name => name.includes('mask')); | |
| log(`Model type: ${isSegmentation ? 'Segmentation' : 'Detection'}`); | |
| } catch (error) { | |
| modelStatusText.textContent = `Model loaded (${file.name})`; | |
| modelStatusBadge.className = 'status-badge ready'; | |
| modelStatusBadge.innerHTML = '<i class="fas fa-check-circle mr-1"></i> Ready'; | |
| log(`Model initialization completed with warnings: ${error.message}`); | |
| console.log('Model loaded but with warnings:', error); | |
| // Try to create session anyway (some models might still work despite warnings) | |
| session = await ort.InferenceSession.create(modelBuffer); | |
| startBtn.disabled = false; | |
| } | |
| }; | |
| reader.onerror = (error) => { | |
| modelStatusText.textContent = `Error reading file`; | |
| modelStatusBadge.className = 'status-badge error'; | |
| modelStatusBadge.innerHTML = '<i class="fas fa-exclamation-circle mr-1"></i> Error'; | |
| log(`File read error: ${error.target.error}`); | |
| }; | |
| reader.readAsArrayBuffer(file); | |
| } catch (error) { | |
| log(`Error handling model file: ${error.message}`); | |
| console.error(error); | |
| } | |
| } | |
| // Start webcam and detection | |
| startBtn.addEventListener('click', async () => { | |
| if (isRunning) { | |
| // Stop detection | |
| isRunning = false; | |
| startBtn.innerHTML = '<i class="fas fa-play mr-2"></i> Start Detection'; | |
| startBtn.classList.remove('bg-red-600', 'hover:bg-red-700'); | |
| startBtn.classList.add('bg-green-600', 'hover:bg-green-700'); | |
| webcamStatusBadge.className = 'status-badge disabled'; | |
| webcamStatusBadge.innerHTML = '<i class="fas fa-times-circle mr-1"></i> Inactive'; | |
| log('Detection stopped'); | |
| return; | |
| } | |
| try { | |
| // Get webcam access | |
| log('Requesting webcam access...'); | |
| const stream = await navigator.mediaDevices.getUserMedia({ | |
| video: { | |
| width: { ideal: 640 }, | |
| height: { ideal: 640 }, | |
| facingMode: 'environment' | |
| }, | |
| audio: false | |
| }); | |
| // Set up video element | |
| video.srcObject = stream; | |
| await video.play(); | |
| // Wait for video dimensions to be available | |
| await new Promise(resolve => { | |
| const checkDimensions = () => { | |
| if (video.videoWidth > 0 && video.videoHeight > 0) { | |
| resolve(); | |
| } else { | |
| setTimeout(checkDimensions, 50); | |
| } | |
| }; | |
| checkDimensions(); | |
| }); | |
| // Set canvas dimensions to match video | |
| const videoWidth = video.videoWidth; | |
| const videoHeight = video.videoHeight; | |
| canvas.width = videoWidth; | |
| canvas.height = videoHeight; | |
| segmentationCanvas.width = videoWidth; | |
| segmentationCanvas.height = videoHeight; | |
| // Adjust container aspect ratio | |
| videoContainer.style.aspectRatio = `${videoWidth}/${videoHeight}`; | |
| // Show video and canvas | |
| videoPlaceholder.classList.add('hidden'); | |
| video.classList.remove('hidden'); | |
| canvas.classList.remove('hidden'); | |
| segmentationCanvas.classList.remove('hidden'); | |
| detectionLegend.classList.remove('hidden'); | |
| // Update UI | |
| isRunning = true; | |
| startBtn.innerHTML = '<i class="fas fa-stop mr-2"></i> Stop Detection'; | |
| startBtn.classList.remove('bg-green-600', 'hover:bg-green-700'); | |
| startBtn.classList.add('bg-red-600', 'hover:bg-red-700'); | |
| webcamStatusBadge.className = 'status-badge ready'; | |
| webcamStatusBadge.innerHTML = '<i class="fas fa-check-circle mr-1"></i> Active'; | |
| log(`Webcam started (${videoWidth}x${videoHeight}) - beginning detection`); | |
| // Start detection loop | |
| detectionLoop(); | |
| } catch (error) { | |
| log(`Error accessing webcam: ${error.message}`); | |
| console.error(error); | |
| webcamStatusBadge.className = 'status-badge error'; | |
| webcamStatusBadge.innerHTML = '<i class="fas fa-exclamation-circle mr-1"></i> Error'; | |
| } | |
| }); | |
| // Non-maximum suppression for YOLO outputs | |
| function nonMaxSuppression(boxes, scores, iouThreshold) { | |
| const selectedIndices = []; | |
| const areas = boxes.map(box => (box[2] - box[0]) * (box[3] - box[1])); | |
| // Sort boxes by score (descending) | |
| const scoreIndices = scores.map((score, index) => ({score, index})) | |
| .sort((a, b) => b.score - a.score) | |
| .map(obj => obj.index); | |
| while (scoreIndices.length > 0) { | |
| const current = scoreIndices.shift(); | |
| selectedIndices.push(current); | |
| const currentBox = boxes[current]; | |
| // Calculate IoU with remaining boxes | |
| const remainingBoxes = scoreIndices.map(i => boxes[i]); | |
| const ious = remainingBoxes.map(box => { | |
| const x1 = Math.max(currentBox[0], box[0]); | |
| const y1 = Math.max(currentBox[1], box[1]); | |
| const x2 = Math.min(currentBox[2], box[2]); | |
| const y2 = Math.min(currentBox[3], box[3]); | |
| const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); | |
| const union = areas[current] + areas[box] - intersection; | |
| return intersection / union; | |
| }); | |
| // Filter out boxes with high IoU | |
| for (let i = ious.length - 1; i >= 0; i--) { | |
| if (ious[i] > iouThreshold) { | |
| scoreIndices.splice(i, 1); | |
| } | |
| } | |
| } | |
| return selectedIndices; | |
| } | |
| // Process YOLO output tensor (updated for YOLOv8 format) | |
| function processYoloOutput(output, imgWidth, imgHeight) { | |
| const confThreshold = parseFloat(confidenceThreshold.value); | |
| const iouThresh = parseFloat(iouThreshold.value); | |
| // Get the output tensor (YOLOv8 uses 'output0' for detections) | |
| const outputTensor = output.output0; | |
| const outputData = outputTensor.data; | |
| // Clear previous debug output | |
| debugOutput.innerHTML = ''; | |
| // Log raw tensor shape and first few values | |
| debugLog(`Output tensor shape: [${outputTensor.dims.join(', ')}]`); | |
| debugLog(`First 20 values: ${Array.from(outputData.slice(0, 20)).map(v => v.toFixed(2)).join(', ')}`); | |
| // YOLOv8 output format: [batch, num_detections, 4 (box) + 1 (conf) + num_classes] | |
| const numDetections = outputTensor.dims[1]; | |
| const numFeatures = outputTensor.dims[2]; | |
| debugLog(`Num detections: ${numDetections}, Num features: ${numFeatures}`); | |
| // Extract boxes, scores, and class IDs | |
| const boxes = []; | |
| const scores = []; | |
| const classIds = []; | |
| for (let i = 0; i < numDetections; i++) { | |
| const offset = i * numFeatures; | |
| // Get box in (x1, y1, x2, y2) format (already normalized to [0,1]) | |
| const x1 = outputData[offset]; | |
| const y1 = outputData[offset + 1]; | |
| const x2 = outputData[offset + 2]; | |
| const y2 = outputData[offset + 3]; | |
| // Get confidence score | |
| const conf = outputData[offset + 4]; | |
| // Find class with maximum probability | |
| let maxScore = -1; | |
| let classId = -1; | |
| // Start from offset + 4 (skip box coordinates and objectness) | |
| for (let j = 4; j < numFeatures; j++) { | |
| const score = outputData[offset + j]; | |
| if (score > maxScore) { | |
| maxScore = score; | |
| classId = j - 4; // Subtract 4 because first 4 elements are box coordinates | |
| } | |
| } | |
| // Calculate final score (objectness * class probability) | |
| const finalScore = conf * maxScore; | |
| // Filter by confidence threshold | |
| if (finalScore > confThreshold) { | |
| // Scale box coordinates to image dimensions | |
| const scaledBox = [ | |
| x1 * imgWidth, | |
| y1 * imgHeight, | |
| x2 * imgWidth, | |
| y2 * imgHeight | |
| ]; | |
| boxes.push(scaledBox); | |
| scores.push(finalScore); | |
| classIds.push(classId); | |
| // Log detection details | |
| debugLog(`Detection ${i}: [${scaledBox.map(v => v.toFixed(1)).join(', ')}] score=${finalScore.toFixed(2)} class=${classId}`); | |
| } | |
| } | |
| // Apply non-max suppression | |
| const selectedIndices = nonMaxSuppression(boxes, scores, iouThresh); | |
| // Prepare final detections | |
| const detections = selectedIndices.map(idx => ({ | |
| box: boxes[idx], | |
| score: scores[idx], | |
| classId: classIds[idx], | |
| mask: output.output1 ? getMaskForDetection(output.output1.data, idx, output.output1.dims) : null | |
| })); | |
| return detections; | |
| } | |
| // Extract mask for a specific detection | |
| function getMaskForDetection(masksData, detectionIdx, maskShape) { | |
| // maskShape: [1, mask_dim, mask_height, mask_width] | |
| const maskDim = maskShape[1]; | |
| const maskHeight = maskShape[2]; | |
| const maskWidth = maskShape[3]; | |
| const mask = new Array(maskHeight * maskWidth).fill(0); | |
| // For each pixel, find the channel with max value | |
| for (let y = 0; y < maskHeight; y++) { | |
| for (let x = 0; x < maskWidth; x++) { | |
| let maxVal = -Infinity; | |
| let bestChannel = 0; | |
| for (let c = 0; c < maskDim; c++) { | |
| const idx = (c * maskHeight * maskWidth) + (y * maskWidth) + x; | |
| const val = masksData[detectionIdx * maskDim * maskHeight * maskWidth + idx]; | |
| if (val > maxVal) { | |
| maxVal = val; | |
| bestChannel = c; | |
| } | |
| } | |
| mask[y * maskWidth + x] = bestChannel; | |
| } | |
| } | |
| return { | |
| data: mask, | |
| width: maskWidth, | |
| height: maskHeight | |
| }; | |
| } | |
| // Draw detections on canvas | |
| function drawDetections(detections, imgWidth, imgHeight) { | |
| // Clear previous drawings | |
| ctx.clearRect(0, 0, canvas.width, canvas.height); | |
| segCtx.clearRect(0, 0, segmentationCanvas.width, segmentationCanvas.height); | |
| // Draw video frame | |
| ctx.drawImage(video, 0, 0, canvas.width, canvas.height); | |
| // Clear legend and rebuild | |
| detectionLegend.innerHTML = ''; | |
| const legendItems = new Set(); | |
| // Draw each detection | |
| detections.forEach(det => { | |
| const [x1, y1, x2, y2] = det.box; | |
| const width = x2 - x1; | |
| const height = y2 - y1; | |
| const className = classNames[det.classId] || `Class ${det.classId}`; | |
| const color = classColors[det.classId] || '#3B82F6'; | |
| // Add to legend | |
| if (!legendItems.has(det.classId)) { | |
| legendItems.add(det.classId); | |
| const legendItem = document.createElement('div'); | |
| legendItem.className = 'legend-item'; | |
| legendItem.innerHTML = ` | |
| <div class="legend-color" style="background-color: ${color};"></div> | |
| <span>${className}</span> | |
| <div class="confidence-bar"> | |
| <div class="confidence-fill" style="width: ${det.score * 100}%;"></div> | |
| </div> | |
| `; | |
| detectionLegend.appendChild(legendItem); | |
| } | |
| // Draw mask if available and enabled | |
| if (det.mask && showMasks && showMasks.checked) { | |
| const mask = det.mask; | |
| const scaleX = width / mask.width; | |
| const scaleY = height / mask.height; | |
| // Create a temporary canvas for the mask | |
| const maskCanvas = document.createElement('canvas'); | |
| maskCanvas.width = mask.width; | |
| maskCanvas.height = mask.height; | |
| const maskCtx = maskCanvas.getContext('2d'); | |
| // Draw mask data | |
| const maskImageData = maskCtx.createImageData(mask.width, mask.height); | |
| for (let i = 0; i < mask.data.length; i++) { | |
| if (mask.data[i] > 0) { // Only draw non-zero mask values | |
| const idx = i * 4; | |
| const [r, g, b] = hexToRgb(color); | |
| maskImageData.data[idx] = r; | |
| maskImageData.data[idx + 1] = g; | |
| maskImageData.data[idx + 2] = b; | |
| maskImageData.data[idx + 3] = 150; // Alpha | |
| } | |
| } | |
| maskCtx.putImageData(maskImageData, 0, 0); | |
| // Draw the mask on the segmentation canvas | |
| segCtx.save(); | |
| segCtx.translate(x1, y1); | |
| segCtx.scale(scaleX, scaleY); | |
| segCtx.drawImage(maskCanvas, 0, 0); | |
| segCtx.restore(); | |
| } | |
| // Draw bounding box | |
| ctx.strokeStyle = color; | |
| ctx.lineWidth = 2; | |
| ctx.strokeRect(x1, y1, width, height); | |
| // Draw label background | |
| const label = `${className} ${(det.score * 100).toFixed(1)}%`; | |
| const textWidth = ctx.measureText(label).width; | |
| ctx.fillStyle = color; | |
| ctx.fillRect(x1 - 2, y1 - 20, textWidth + 4, 20); | |
| // Draw label text | |
| ctx.fillStyle = 'white'; | |
| ctx.font = '12px Arial'; | |
| ctx.fillText(label, x1, y1 - 5); | |
| }); | |
| } | |
| // Helper to convert hex to RGB | |
| function hexToRgb(hex) { | |
| const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex); | |
| return result ? [ | |
| parseInt(result[1], 16), | |
| parseInt(result[2], 16), | |
| parseInt(result[3], 16) | |
| ] : [0, 0, 0]; | |
| } | |
| // Detection loop | |
| async function detectionLoop() { | |
| if (!isRunning) return; | |
| const startTime = performance.now(); | |
| try { | |
| // Preprocess frame | |
| const inputTensor = await preprocessFrame(video); | |
| // Run inference | |
| const feeds = { [session.inputNames[0]]: inputTensor }; | |
| const inferenceStart = performance.now(); | |
| const output = await session.run(feeds); | |
| lastInferenceTime = performance.now() - inferenceStart; | |
| // Process YOLO output | |
| const detections = processYoloOutput(output, video.videoWidth, video.videoHeight); | |
| // Draw detections | |
| drawDetections(detections, video.videoWidth, video.videoHeight); | |
| // Log detection info | |
| if (detections.length > 0) { | |
| const topDetection = detections[0]; | |
| const className = classNames[topDetection.classId] || `Class ${topDetection.classId}`; | |
| log(`Detected ${detections.length} objects (top: ${className} @ ${(topDetection.score * 100).toFixed(1)}%)`); | |
| } | |
| // Update performance counters | |
| frameCount++; | |
| const now = performance.now(); | |
| if (now - lastFpsUpdate >= 1000) { | |
| fps = frameCount * 1000 / (now - lastFpsUpdate); | |
| frameCount = 0; | |
| lastFpsUpdate = now; | |
| // Update UI | |
| fpsCounter.textContent = Math.round(fps); | |
| inferenceTime.textContent = lastInferenceTime.toFixed(1); | |
| } | |
| } catch (error) { | |
| log(`Detection error: ${error.message}`); | |
| console.error(error); | |
| } | |
| // Schedule next frame | |
| requestAnimationFrame(detectionLoop); | |
| } | |
| // Preprocess video frame for model input | |
| async function preprocessFrame(videoElement) { | |
| // Create temporary canvas | |
| const tempCanvas = document.createElement('canvas'); | |
| tempCanvas.width = videoElement.videoWidth; | |
| tempCanvas.height = videoElement.videoHeight; | |
| const tempCtx = tempCanvas.getContext('2d'); | |
| // Draw video frame to canvas | |
| tempCtx.drawImage(videoElement, 0, 0, tempCanvas.width, tempCanvas.height); | |
| // Get image data | |
| const imageData = tempCtx.getImageData(0, 0, tempCanvas.width, tempCanvas.height); | |
| // Convert to Float32Array and normalize (assuming model expects [0,1] range) | |
| const float32Data = new Float32Array(tempCanvas.width * tempCanvas.height * 3); | |
| // Convert from RGBA to RGB and normalize | |
| for (let i = 0, j = 0; i < imageData.data.length; i += 4) { | |
| float32Data[j++] = imageData.data[i] / 255.0; // R | |
| float32Data[j++] = imageData.data[i + 1] / 255.0; // G | |
| float32Data[j++] = imageData.data[i + 2] / 255.0; // B | |
| } | |
| // Convert from HWC to CHW format (channels first) | |
| const chwData = new Float32Array(float32Data.length); | |
| const channelSize = tempCanvas.width * tempCanvas.height; | |
| for (let c = 0; c < 3; ++c) { | |
| for (let i = 0; i < channelSize; ++i) { | |
| chwData[c * channelSize + i] = float32Data[i * 3 + c]; | |
| } | |
| } | |
| // Create tensor with shape [1, 3, height, width] | |
| return new ort.Tensor('float32', chwData, [1, 3, tempCanvas.height, tempCanvas.width]); | |
| } | |
| // Initialize class names (simplified COCO classes for demo) | |
| function initClassNames() { | |
| classNames = { | |
| 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', | |
| 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', | |
| 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', | |
| 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', | |
| 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', | |
| 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', | |
| 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', | |
| 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', | |
| 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', | |
| 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', | |
| 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', | |
| 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', | |
| 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', | |
| 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', | |
| 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', | |
| 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', | |
| 79: 'toothbrush' | |
| }; | |
| } | |
| // Initialize on page load | |
| window.addEventListener('DOMContentLoaded', () => { | |
| initClassNames(); | |
| }); | |
| // Clean up on page unload | |
| window.addEventListener('beforeunload', () => { | |
| if (session) { | |
| // Clean up ONNX session if needed | |
| } | |
| // Stop webcam stream | |
| if (video.srcObject) { | |
| video.srcObject.getTracks().forEach(track => track.stop()); | |
| } | |
| }); | |
| </script> | |
| <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=MaxLeft/yolo-detection-app" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body> | |
| </html> |