Spaces:

MaxLeft
/

yolo-detection-app

Running

App Files Files Community

yolo-detection-app / index.html

MaxLeft

Add 3 files

91f934f verified about 1 year ago

Raw

History Blame Contribute Delete

46.1 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>ONNX YOLO Segmentation Web Demo</title>
	<script src="https://cdn.tailwindcss.com"></script>
	<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
	<style>
	.detection-box {
	position: absolute;
	border: 2px solid #3B82F6;
	background-color: rgba(59, 130, 246, 0.2);
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: flex-end;
	color: white;
	font-weight: bold;
	font-size: 12px;
	}

	.detection-label {
	background-color: #3B82F6;
	padding: 2px 5px;
	border-radius: 3px;
	margin-bottom: 2px;
	}

	.pulse {
	animation: pulse 2s infinite;
	}

	@keyframes pulse {
	0% {
	box-shadow: 0 0 0 0 rgba(59, 130, 246, 0.7);
	}
	70% {
	box-shadow: 0 0 0 10px rgba(59, 130, 246, 0);
	}
	100% {
	box-shadow: 0 0 0 0 rgba(59, 130, 246, 0);
	}
	}

	#video-container {
	position: relative;
	width: 100%;
	max-width: 640px;
	margin: 0 auto;
	border-radius: 8px;
	overflow: hidden;
	box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
	}

	#video, #canvas {
	width: 100%;
	height: auto;
	display: block;
	}

	#canvas {
	position: absolute;
	top: 0;
	left: 0;
	z-index: 10;
	}

	#segmentation {
	position: absolute;
	top: 0;
	left: 0;
	z-index: 5;
	opacity: 0.5;
	}

	.dropzone {
	border: 2px dashed #4B5563;
	border-radius: 8px;
	padding: 20px;
	text-align: center;
	cursor: pointer;
	transition: all 0.3s;
	}

	.dropzone:hover {
	border-color: #3B82F6;
	background-color: rgba(59, 130, 246, 0.1);
	}

	.dropzone.active {
	border-color: #3B82F6;
	background-color: rgba(59, 130, 246, 0.2);
	}

	.status-badge {
	display: inline-flex;
	align-items: center;
	padding: 4px 8px;
	border-radius: 9999px;
	font-size: 12px;
	font-weight: 600;
	}

	.status-badge.ready {
	background-color: rgba(16, 185, 129, 0.2);
	color: #10B981;
	}

	.status-badge.loading {
	background-color: rgba(245, 158, 11, 0.2);
	color: #F59E0B;
	}

	.status-badge.error {
	background-color: rgba(239, 68, 68, 0.2);
	color: #EF4444;
	}

	.status-badge.disabled {
	background-color: rgba(75, 85, 99, 0.2);
	color: #4B5563;
	}

	.output-log {
	font-family: 'Courier New', Courier, monospace;
	background-color: rgba(31, 41, 55, 0.8);
	border-radius: 8px;
	padding: 16px;
	max-height: 200px;
	overflow-y: auto;
	}

	.legend {
	display: flex;
	flex-wrap: wrap;
	gap: 8px;
	margin-top: 8px;
	}

	.legend-item {
	display: flex;
	align-items: center;
	font-size: 12px;
	}

	.legend-color {
	width: 16px;
	height: 16px;
	border-radius: 3px;
	margin-right: 4px;
	}

	.confidence-bar {
	height: 4px;
	background-color: #4B5563;
	border-radius: 2px;
	margin-top: 2px;
	overflow: hidden;
	}

	.confidence-fill {
	height: 100%;
	background-color: #10B981;
	}

	.debug-output {
	font-family: 'Courier New', Courier, monospace;
	background-color: rgba(31, 41, 55, 0.8);
	border-radius: 8px;
	padding: 16px;
	max-height: 200px;
	overflow-y: auto;
	margin-top: 16px;
	font-size: 12px;
	white-space: pre-wrap;
	}
	</style>
	</head>
	<body class="bg-gray-900 text-gray-100 min-h-screen">
	<div class="container mx-auto px-4 py-8">
	<header class="text-center mb-8">
	<h1 class="text-3xl md:text-4xl font-bold mb-2 text-blue-400">
	<i class="fas fa-shapes mr-2"></i> YOLO Segmentation Web Demo
	</h1>
	<p class="text-gray-400 max-w-2xl mx-auto">
	Real-time instance segmentation with YOLO ONNX models in your browser
	</p>
	</header>

	<div class="max-w-4xl mx-auto">
	<div class="grid grid-cols-1 md:grid-cols-2 gap-8">
	<!-- Left column - Controls -->
	<div class="space-y-6">
	<!-- Model Selection -->
	<div class="bg-gray-800 rounded-lg p-6 shadow-lg">
	<h2 class="text-xl font-bold mb-4 text-blue-400">
	<i class="fas fa-file-export mr-2"></i> Model Selection
	</h2>

	<div id="dropzone" class="dropzone mb-4">
	<div class="flex flex-col items-center justify-center py-4">
	<i class="fas fa-file-upload text-4xl text-blue-400 mb-2"></i>
	<p class="text-gray-300">Drag & drop your YOLO ONNX model file here</p>
	<p class="text-gray-400 text-sm mt-1">or click to browse</p>
	<input type="file" id="modelFile" accept=".onnx" class="hidden" />
	</div>
	</div>

	<div class="flex items-center justify-between">
	<div>
	<p id="modelStatusText" class="text-sm text-gray-400">No model selected</p>
	<p id="modelSizeText" class="text-xs text-gray-500"></p>
	</div>
	<span id="modelStatusBadge" class="status-badge disabled">
	<i class="fas fa-times-circle mr-1"></i> Not Loaded
	</span>
	</div>
	</div>

	<!-- Detection Settings -->
	<div class="bg-gray-800 rounded-lg p-6 shadow-lg">
	<h2 class="text-xl font-bold mb-4 text-blue-400">
	<i class="fas fa-sliders-h mr-2"></i> Detection Settings
	</h2>

	<div class="space-y-4">
	<div>
	<label for="confidenceThreshold" class="block text-sm font-medium text-gray-300 mb-1">
	Confidence Threshold: <span id="confidenceValue">0.5</span>
	</label>
	<input type="range" id="confidenceThreshold" min="0" max="1" step="0.05" value="0.5"
	class="w-full h-2 bg-gray-700 rounded-lg appearance-none cursor-pointer">
	</div>

	<div>
	<label for="iouThreshold" class="block text-sm font-medium text-gray-300 mb-1">
	IOU Threshold: <span id="iouValue">0.45</span>
	</label>
	<input type="range" id="iouThreshold" min="0" max="1" step="0.05" value="0.45"
	class="w-full h-2 bg-gray-700 rounded-lg appearance-none cursor-pointer">
	</div>

	<div class="flex items-center justify-between">
	<label for="showMasks" class="text-sm font-medium text-gray-300">
	Show Segmentation Masks
	</label>
	<label class="relative inline-flex items-center cursor-pointer">
	<input type="checkbox" id="showMasks" class="sr-only peer" checked>
	<div class="w-11 h-6 bg-gray-700 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"></div>
	</label>
	</div>
	</div>
	</div>

	<!-- Webcam Controls -->
	<div class="bg-gray-800 rounded-lg p-6 shadow-lg">
	<h2 class="text-xl font-bold mb-4 text-blue-400">
	<i class="fas fa-video mr-2"></i> Webcam Controls
	</h2>

	<div class="flex flex-col space-y-4">
	<button id="startBtn" class="bg-green-600 hover:bg-green-700 text-white font-bold py-3 px-6 rounded-lg flex items-center justify-center disabled:opacity-50 disabled:cursor-not-allowed" disabled>
	<i class="fas fa-play mr-2"></i> Start Detection
	</button>

	<div class="flex items-center justify-between">
	<div>
	<p class="text-sm text-gray-400">Webcam Status</p>
	</div>
	<span id="webcamStatusBadge" class="status-badge disabled">
	<i class="fas fa-times-circle mr-1"></i> Inactive
	</span>
	</div>
	</div>
	</div>

	<!-- Performance Stats -->
	<div class="bg-gray-800 rounded-lg p-6 shadow-lg">
	<h2 class="text-xl font-bold mb-4 text-blue-400">
	<i class="fas fa-tachometer-alt mr-2"></i> Performance
	</h2>

	<div class="grid grid-cols-2 gap-4">
	<div class="bg-gray-700 p-4 rounded-lg text-center">
	<div class="text-2xl font-bold text-blue-400" id="fpsCounter">-</div>
	<div class="text-gray-300 text-sm">FPS</div>
	</div>
	<div class="bg-gray-700 p-4 rounded-lg text-center">
	<div class="text-2xl font-bold text-green-400" id="inferenceTime">-</div>
	<div class="text-gray-300 text-sm">ms/inference</div>
	</div>
	</div>
	</div>
	</div>

	<!-- Right column - Output -->
	<div class="space-y-6">
	<!-- Video Feed -->
	<div class="bg-gray-800 rounded-lg p-6 shadow-lg">
	<h2 class="text-xl font-bold mb-4 text-blue-400">
	<i class="fas fa-eye mr-2"></i> Live Detection
	</h2>

	<div id="video-container" class="relative">
	<div id="videoPlaceholder" class="bg-gray-700 rounded-lg flex items-center justify-center aspect-square">
	<div class="text-center p-8">
	<i class="fas fa-camera text-4xl text-gray-500 mb-4"></i>
	<p class="text-gray-400">Webcam feed will appear here</p>
	</div>
	</div>
	<video id="video" autoplay playsinline muted class="hidden"></video>
	<canvas id="segmentation" class="hidden"></canvas>
	<canvas id="canvas" class="hidden"></canvas>
	</div>

	<div id="detectionLegend" class="legend mt-4 hidden">
	<!-- Legend items will be added dynamically -->
	</div>
	</div>

	<!-- Output Log -->
	<div class="bg-gray-800 rounded-lg p-6 shadow-lg">
	<h2 class="text-xl font-bold mb-4 text-blue-400">
	<i class="fas fa-terminal mr-2"></i> Output Log
	</h2>

	<div class="output-log text-sm" id="log">
	<p class="text-gray-400">Waiting for model to load...</p>
	</div>
	</div>

	<!-- Debug Output -->
	<div class="bg-gray-800 rounded-lg p-6 shadow-lg">
	<h2 class="text-xl font-bold mb-4 text-blue-400">
	<i class="fas fa-bug mr-2"></i> Debug Output
	</h2>

	<div class="debug-output" id="debugOutput">
	<p class="text-gray-400">Raw tensor output will appear here</p>
	</div>
	</div>
	</div>
	</div>
	</div>

	<footer class="mt-12 text-center text-gray-500 text-sm">
	<p>Powered by ONNX Runtime Web - All processing happens in your browser</p>
	</footer>
	</div>

	<script>
	// DOM elements
	const video = document.getElementById('video');
	const canvas = document.getElementById('canvas');
	const segmentationCanvas = document.getElementById('segmentation');
	const ctx = canvas.getContext('2d');
	const segCtx = segmentationCanvas.getContext('2d');
	const startBtn = document.getElementById('startBtn');
	const logElement = document.getElementById('log');
	const debugOutput = document.getElementById('debugOutput');
	const modelFileInput = document.getElementById('modelFile');
	const dropzone = document.getElementById('dropzone');
	const modelStatusText = document.getElementById('modelStatusText');
	const modelSizeText = document.getElementById('modelSizeText');
	const modelStatusBadge = document.getElementById('modelStatusBadge');
	const webcamStatusBadge = document.getElementById('webcamStatusBadge');
	const fpsCounter = document.getElementById('fpsCounter');
	const inferenceTime = document.getElementById('inferenceTime');
	const videoPlaceholder = document.getElementById('videoPlaceholder');
	const videoContainer = document.getElementById('video-container');
	const confidenceThreshold = document.getElementById('confidenceThreshold');
	const iouThreshold = document.getElementById('iouThreshold');
	const confidenceValue = document.getElementById('confidenceValue');
	const iouValue = document.getElementById('iouValue');
	const showMasks = document.getElementById('showMasks');
	const detectionLegend = document.getElementById('detectionLegend');

	// App state
	let session = null;
	let modelBuffer = null;
	let isRunning = false;
	let frameCount = 0;
	let lastFpsUpdate = 0;
	let fps = 0;
	let lastInferenceTime = 0;
	let classColors = {};
	let classNames = {}; // Will be populated based on model output

	// Update log with timestamp
	function log(message) {
	const now = new Date();
	const timestamp = now.toLocaleTimeString();
	const logEntry = document.createElement('p');
	logEntry.innerHTML = `<span class="text-gray-500">[${timestamp}]</span> ${message}`;
	logElement.appendChild(logEntry);
	logElement.scrollTop = logElement.scrollHeight;
	}

	// Update debug output with raw tensor data
	function debugLog(message) {
	const debugEntry = document.createElement('div');
	debugEntry.textContent = message;
	debugOutput.appendChild(debugEntry);
	debugOutput.scrollTop = debugOutput.scrollHeight;
	}

	// Generate random colors for classes
	function generateClassColors(count) {
	const colors = {};
	for (let i = 0; i < count; i++) {
	// Generate a bright color
	const hue = (i * 360 / count) % 360;
	colors[i] = `hsl(${hue}, 80%, 60%)`;
	}
	return colors;
	}

	// Update settings UI
	confidenceThreshold.addEventListener('input', () => {
	confidenceValue.textContent = confidenceThreshold.value;
	});

	iouThreshold.addEventListener('input', () => {
	iouValue.textContent = iouThreshold.value;
	});

	// Set up dropzone interactions
	dropzone.addEventListener('click', () => {
	modelFileInput.click();
	});

	['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
	dropzone.addEventListener(eventName, preventDefaults, false);
	});

	function preventDefaults(e) {
	e.preventDefault();
	e.stopPropagation();
	}

	['dragenter', 'dragover'].forEach(eventName => {
	dropzone.addEventListener(eventName, highlight, false);
	});

	['dragleave', 'drop'].forEach(eventName => {
	dropzone.addEventListener(eventName, unhighlight, false);
	});

	function highlight() {
	dropzone.classList.add('active');
	}

	function unhighlight() {
	dropzone.classList.remove('active');
	}

	dropzone.addEventListener('drop', handleDrop, false);

	// Handle model file selection
	function handleDrop(e) {
	const dt = e.dataTransfer;
	const files = dt.files;

	if (files.length > 0 && files[0].name.endsWith('.onnx')) {
	handleModelFile(files[0]);
	}
	}

	modelFileInput.addEventListener('change', (e) => {
	const files = e.target.files;
	if (files.length > 0 && files[0].name.endsWith('.onnx')) {
	handleModelFile(files[0]);
	}
	});

	// Process the selected model file
	async function handleModelFile(file) {
	try {
	// Update UI
	modelStatusText.textContent = `Loading ${file.name}...`;
	modelSizeText.textContent = `(${(file.size/1e6).toFixed(1)} MB)`;
	modelStatusBadge.className = 'status-badge loading';
	modelStatusBadge.innerHTML = '<i class="fas fa-spinner fa-spin mr-1"></i> Loading';
	startBtn.disabled = true;

	// Read the file
	const reader = new FileReader();
	reader.onload = async (ev) => {
	modelBuffer = ev.target.result;

	// Initialize ONNX session
	log(`Initializing ONNX session for ${file.name}`);

	try {
	// Create session options with WebGL and WASM backends
	const sessionOptions = {
	executionProviders: ['webgl', 'wasm'],
	graphOptimizationLevel: 'all'
	};

	// Try to create session with WebGL first, fall back to WASM if needed
	try {
	session = await ort.InferenceSession.create(modelBuffer, sessionOptions);
	} catch (webglError) {
	log(`WebGL backend failed, falling back to WASM: ${webglError.message}`);
	sessionOptions.executionProviders = ['wasm'];
	session = await ort.InferenceSession.create(modelBuffer, sessionOptions);
	}

	// Generate colors for classes (assuming 80 classes for YOLO)
	classColors = generateClassColors(80);

	// Success
	modelStatusText.textContent = `Loaded: ${file.name}`;
	modelStatusBadge.className = 'status-badge ready';
	modelStatusBadge.innerHTML = '<i class="fas fa-check-circle mr-1"></i> Ready';
	startBtn.disabled = false;

	log(`Model loaded successfully with ${session.inputNames.length} inputs and ${session.outputNames.length} outputs`);
	log(`Input shape: ${JSON.stringify(session.inputs[0].dims)}`);

	// Check if this is a segmentation model
	const isSegmentation = session.outputNames.some(name => name.includes('mask'));
	log(`Model type: ${isSegmentation ? 'Segmentation' : 'Detection'}`);

	} catch (error) {
	modelStatusText.textContent = `Model loaded (${file.name})`;
	modelStatusBadge.className = 'status-badge ready';
	modelStatusBadge.innerHTML = '<i class="fas fa-check-circle mr-1"></i> Ready';
	log(`Model initialization completed with warnings: ${error.message}`);
	console.log('Model loaded but with warnings:', error);

	// Try to create session anyway (some models might still work despite warnings)
	session = await ort.InferenceSession.create(modelBuffer);
	startBtn.disabled = false;
	}
	};
	reader.onerror = (error) => {
	modelStatusText.textContent = `Error reading file`;
	modelStatusBadge.className = 'status-badge error';
	modelStatusBadge.innerHTML = '<i class="fas fa-exclamation-circle mr-1"></i> Error';
	log(`File read error: ${error.target.error}`);
	};

	reader.readAsArrayBuffer(file);

	} catch (error) {
	log(`Error handling model file: ${error.message}`);
	console.error(error);
	}
	}

	// Start webcam and detection
	startBtn.addEventListener('click', async () => {
	if (isRunning) {
	// Stop detection
	isRunning = false;
	startBtn.innerHTML = '<i class="fas fa-play mr-2"></i> Start Detection';
	startBtn.classList.remove('bg-red-600', 'hover:bg-red-700');
	startBtn.classList.add('bg-green-600', 'hover:bg-green-700');
	webcamStatusBadge.className = 'status-badge disabled';
	webcamStatusBadge.innerHTML = '<i class="fas fa-times-circle mr-1"></i> Inactive';
	log('Detection stopped');
	return;
	}

	try {
	// Get webcam access
	log('Requesting webcam access...');
	const stream = await navigator.mediaDevices.getUserMedia({
	video: {
	width: { ideal: 640 },
	height: { ideal: 640 },
	facingMode: 'environment'
	},
	audio: false
	});

	// Set up video element
	video.srcObject = stream;
	await video.play();

	// Wait for video dimensions to be available
	await new Promise(resolve => {
	const checkDimensions = () => {
	if (video.videoWidth > 0 && video.videoHeight > 0) {
	resolve();
	} else {
	setTimeout(checkDimensions, 50);
	}
	};
	checkDimensions();
	});

	// Set canvas dimensions to match video
	const videoWidth = video.videoWidth;
	const videoHeight = video.videoHeight;

	canvas.width = videoWidth;
	canvas.height = videoHeight;
	segmentationCanvas.width = videoWidth;
	segmentationCanvas.height = videoHeight;

	// Adjust container aspect ratio
	videoContainer.style.aspectRatio = `${videoWidth}/${videoHeight}`;

	// Show video and canvas
	videoPlaceholder.classList.add('hidden');
	video.classList.remove('hidden');
	canvas.classList.remove('hidden');
	segmentationCanvas.classList.remove('hidden');
	detectionLegend.classList.remove('hidden');

	// Update UI
	isRunning = true;
	startBtn.innerHTML = '<i class="fas fa-stop mr-2"></i> Stop Detection';
	startBtn.classList.remove('bg-green-600', 'hover:bg-green-700');
	startBtn.classList.add('bg-red-600', 'hover:bg-red-700');
	webcamStatusBadge.className = 'status-badge ready';
	webcamStatusBadge.innerHTML = '<i class="fas fa-check-circle mr-1"></i> Active';
	log(`Webcam started (${videoWidth}x${videoHeight}) - beginning detection`);

	// Start detection loop
	detectionLoop();

	} catch (error) {
	log(`Error accessing webcam: ${error.message}`);
	console.error(error);
	webcamStatusBadge.className = 'status-badge error';
	webcamStatusBadge.innerHTML = '<i class="fas fa-exclamation-circle mr-1"></i> Error';
	}
	});

	// Non-maximum suppression for YOLO outputs
	function nonMaxSuppression(boxes, scores, iouThreshold) {
	const selectedIndices = [];
	const areas = boxes.map(box => (box[2] - box[0]) * (box[3] - box[1]));

	// Sort boxes by score (descending)
	const scoreIndices = scores.map((score, index) => ({score, index}))
	.sort((a, b) => b.score - a.score)
	.map(obj => obj.index);

	while (scoreIndices.length > 0) {
	const current = scoreIndices.shift();
	selectedIndices.push(current);

	const currentBox = boxes[current];

	// Calculate IoU with remaining boxes
	const remainingBoxes = scoreIndices.map(i => boxes[i]);
	const ious = remainingBoxes.map(box => {
	const x1 = Math.max(currentBox[0], box[0]);
	const y1 = Math.max(currentBox[1], box[1]);
	const x2 = Math.min(currentBox[2], box[2]);
	const y2 = Math.min(currentBox[3], box[3]);

	const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
	const union = areas[current] + areas[box] - intersection;

	return intersection / union;
	});

	// Filter out boxes with high IoU
	for (let i = ious.length - 1; i >= 0; i--) {
	if (ious[i] > iouThreshold) {
	scoreIndices.splice(i, 1);
	}
	}
	}

	return selectedIndices;
	}

	// Process YOLO output tensor (updated for YOLOv8 format)
	function processYoloOutput(output, imgWidth, imgHeight) {
	const confThreshold = parseFloat(confidenceThreshold.value);
	const iouThresh = parseFloat(iouThreshold.value);

	// Get the output tensor (YOLOv8 uses 'output0' for detections)
	const outputTensor = output.output0;
	const outputData = outputTensor.data;

	// Clear previous debug output
	debugOutput.innerHTML = '';

	// Log raw tensor shape and first few values
	debugLog(`Output tensor shape: [${outputTensor.dims.join(', ')}]`);
	debugLog(`First 20 values: ${Array.from(outputData.slice(0, 20)).map(v => v.toFixed(2)).join(', ')}`);

	// YOLOv8 output format: [batch, num_detections, 4 (box) + 1 (conf) + num_classes]
	const numDetections = outputTensor.dims[1];
	const numFeatures = outputTensor.dims[2];

	debugLog(`Num detections: ${numDetections}, Num features: ${numFeatures}`);

	// Extract boxes, scores, and class IDs
	const boxes = [];
	const scores = [];
	const classIds = [];

	for (let i = 0; i < numDetections; i++) {
	const offset = i * numFeatures;

	// Get box in (x1, y1, x2, y2) format (already normalized to [0,1])
	const x1 = outputData[offset];
	const y1 = outputData[offset + 1];
	const x2 = outputData[offset + 2];
	const y2 = outputData[offset + 3];

	// Get confidence score
	const conf = outputData[offset + 4];

	// Find class with maximum probability
	let maxScore = -1;
	let classId = -1;

	// Start from offset + 4 (skip box coordinates and objectness)
	for (let j = 4; j < numFeatures; j++) {
	const score = outputData[offset + j];
	if (score > maxScore) {
	maxScore = score;
	classId = j - 4; // Subtract 4 because first 4 elements are box coordinates
	}
	}

	// Calculate final score (objectness * class probability)
	const finalScore = conf * maxScore;

	// Filter by confidence threshold
	if (finalScore > confThreshold) {
	// Scale box coordinates to image dimensions
	const scaledBox = [
	x1 * imgWidth,
	y1 * imgHeight,
	x2 * imgWidth,
	y2 * imgHeight
	];

	boxes.push(scaledBox);
	scores.push(finalScore);
	classIds.push(classId);

	// Log detection details
	debugLog(`Detection ${i}: [${scaledBox.map(v => v.toFixed(1)).join(', ')}] score=${finalScore.toFixed(2)} class=${classId}`);
	}
	}

	// Apply non-max suppression
	const selectedIndices = nonMaxSuppression(boxes, scores, iouThresh);

	// Prepare final detections
	const detections = selectedIndices.map(idx => ({
	box: boxes[idx],
	score: scores[idx],
	classId: classIds[idx],
	mask: output.output1 ? getMaskForDetection(output.output1.data, idx, output.output1.dims) : null
	}));

	return detections;
	}

	// Extract mask for a specific detection
	function getMaskForDetection(masksData, detectionIdx, maskShape) {
	// maskShape: [1, mask_dim, mask_height, mask_width]
	const maskDim = maskShape[1];
	const maskHeight = maskShape[2];
	const maskWidth = maskShape[3];

	const mask = new Array(maskHeight * maskWidth).fill(0);

	// For each pixel, find the channel with max value
	for (let y = 0; y < maskHeight; y++) {
	for (let x = 0; x < maskWidth; x++) {
	let maxVal = -Infinity;
	let bestChannel = 0;

	for (let c = 0; c < maskDim; c++) {
	const idx = (c * maskHeight * maskWidth) + (y * maskWidth) + x;
	const val = masksData[detectionIdx * maskDim * maskHeight * maskWidth + idx];

	if (val > maxVal) {
	maxVal = val;
	bestChannel = c;
	}
	}

	mask[y * maskWidth + x] = bestChannel;
	}
	}

	return {
	data: mask,
	width: maskWidth,
	height: maskHeight
	};
	}

	// Draw detections on canvas
	function drawDetections(detections, imgWidth, imgHeight) {
	// Clear previous drawings
	ctx.clearRect(0, 0, canvas.width, canvas.height);
	segCtx.clearRect(0, 0, segmentationCanvas.width, segmentationCanvas.height);

	// Draw video frame
	ctx.drawImage(video, 0, 0, canvas.width, canvas.height);

	// Clear legend and rebuild
	detectionLegend.innerHTML = '';
	const legendItems = new Set();

	// Draw each detection
	detections.forEach(det => {
	const [x1, y1, x2, y2] = det.box;
	const width = x2 - x1;
	const height = y2 - y1;
	const className = classNames[det.classId] \|\| `Class ${det.classId}`;
	const color = classColors[det.classId] \|\| '#3B82F6';

	// Add to legend
	if (!legendItems.has(det.classId)) {
	legendItems.add(det.classId);

	const legendItem = document.createElement('div');
	legendItem.className = 'legend-item';
	legendItem.innerHTML = `
	<div class="legend-color" style="background-color: ${color};"></div>
	<span>${className}</span>
	<div class="confidence-bar">
	<div class="confidence-fill" style="width: ${det.score * 100}%;"></div>
	</div>
	`;
	detectionLegend.appendChild(legendItem);
	}

	// Draw mask if available and enabled
	if (det.mask && showMasks && showMasks.checked) {
	const mask = det.mask;
	const scaleX = width / mask.width;
	const scaleY = height / mask.height;

	// Create a temporary canvas for the mask
	const maskCanvas = document.createElement('canvas');
	maskCanvas.width = mask.width;
	maskCanvas.height = mask.height;
	const maskCtx = maskCanvas.getContext('2d');

	// Draw mask data
	const maskImageData = maskCtx.createImageData(mask.width, mask.height);
	for (let i = 0; i < mask.data.length; i++) {
	if (mask.data[i] > 0) { // Only draw non-zero mask values
	const idx = i * 4;
	const [r, g, b] = hexToRgb(color);
	maskImageData.data[idx] = r;
	maskImageData.data[idx + 1] = g;
	maskImageData.data[idx + 2] = b;
	maskImageData.data[idx + 3] = 150; // Alpha
	}
	}
	maskCtx.putImageData(maskImageData, 0, 0);

	// Draw the mask on the segmentation canvas
	segCtx.save();
	segCtx.translate(x1, y1);
	segCtx.scale(scaleX, scaleY);
	segCtx.drawImage(maskCanvas, 0, 0);
	segCtx.restore();
	}

	// Draw bounding box
	ctx.strokeStyle = color;
	ctx.lineWidth = 2;
	ctx.strokeRect(x1, y1, width, height);

	// Draw label background
	const label = `${className} ${(det.score * 100).toFixed(1)}%`;
	const textWidth = ctx.measureText(label).width;

	ctx.fillStyle = color;
	ctx.fillRect(x1 - 2, y1 - 20, textWidth + 4, 20);

	// Draw label text
	ctx.fillStyle = 'white';
	ctx.font = '12px Arial';
	ctx.fillText(label, x1, y1 - 5);
	});
	}

	// Helper to convert hex to RGB
	function hexToRgb(hex) {
	const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
	return result ? [
	parseInt(result[1], 16),
	parseInt(result[2], 16),
	parseInt(result[3], 16)
	] : [0, 0, 0];
	}

	// Detection loop
	async function detectionLoop() {
	if (!isRunning) return;

	const startTime = performance.now();

	try {
	// Preprocess frame
	const inputTensor = await preprocessFrame(video);

	// Run inference
	const feeds = { [session.inputNames[0]]: inputTensor };
	const inferenceStart = performance.now();
	const output = await session.run(feeds);
	lastInferenceTime = performance.now() - inferenceStart;

	// Process YOLO output
	const detections = processYoloOutput(output, video.videoWidth, video.videoHeight);

	// Draw detections
	drawDetections(detections, video.videoWidth, video.videoHeight);

	// Log detection info
	if (detections.length > 0) {
	const topDetection = detections[0];
	const className = classNames[topDetection.classId] \|\| `Class ${topDetection.classId}`;
	log(`Detected ${detections.length} objects (top: ${className} @ ${(topDetection.score * 100).toFixed(1)}%)`);
	}

	// Update performance counters
	frameCount++;
	const now = performance.now();
	if (now - lastFpsUpdate >= 1000) {
	fps = frameCount * 1000 / (now - lastFpsUpdate);
	frameCount = 0;
	lastFpsUpdate = now;

	// Update UI
	fpsCounter.textContent = Math.round(fps);
	inferenceTime.textContent = lastInferenceTime.toFixed(1);
	}

	} catch (error) {
	log(`Detection error: ${error.message}`);
	console.error(error);
	}

	// Schedule next frame
	requestAnimationFrame(detectionLoop);
	}

	// Preprocess video frame for model input
	async function preprocessFrame(videoElement) {
	// Create temporary canvas
	const tempCanvas = document.createElement('canvas');
	tempCanvas.width = videoElement.videoWidth;
	tempCanvas.height = videoElement.videoHeight;
	const tempCtx = tempCanvas.getContext('2d');

	// Draw video frame to canvas
	tempCtx.drawImage(videoElement, 0, 0, tempCanvas.width, tempCanvas.height);

	// Get image data
	const imageData = tempCtx.getImageData(0, 0, tempCanvas.width, tempCanvas.height);

	// Convert to Float32Array and normalize (assuming model expects [0,1] range)
	const float32Data = new Float32Array(tempCanvas.width * tempCanvas.height * 3);

	// Convert from RGBA to RGB and normalize
	for (let i = 0, j = 0; i < imageData.data.length; i += 4) {
	float32Data[j++] = imageData.data[i] / 255.0; // R
	float32Data[j++] = imageData.data[i + 1] / 255.0; // G
	float32Data[j++] = imageData.data[i + 2] / 255.0; // B
	}

	// Convert from HWC to CHW format (channels first)
	const chwData = new Float32Array(float32Data.length);
	const channelSize = tempCanvas.width * tempCanvas.height;

	for (let c = 0; c < 3; ++c) {
	for (let i = 0; i < channelSize; ++i) {
	chwData[c * channelSize + i] = float32Data[i * 3 + c];
	}
	}

	// Create tensor with shape [1, 3, height, width]
	return new ort.Tensor('float32', chwData, [1, 3, tempCanvas.height, tempCanvas.width]);
	}

	// Initialize class names (simplified COCO classes for demo)
	function initClassNames() {
	classNames = {
	0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane',
	5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
	10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
	14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
	20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack',
	25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
	30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat',
	35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket',
	39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife',
	44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich',
	49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza',
	54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',
	59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop',
	64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave',
	69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book',
	74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier',
	79: 'toothbrush'
	};
	}

	// Initialize on page load
	window.addEventListener('DOMContentLoaded', () => {
	initClassNames();
	});

	// Clean up on page unload
	window.addEventListener('beforeunload', () => {
	if (session) {
	// Clean up ONNX session if needed
	}

	// Stop webcam stream
	if (video.srcObject) {
	video.srcObject.getTracks().forEach(track => track.stop());
	}
	});
	</script>
	<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=MaxLeft/yolo-detection-app" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
	</html>