Spaces:

azizmeer40
/

VoiceVision-AI

Sleeping

App Files Files Community

VoiceVision-AI / utils /object_detection.py

azizmeer40

Update utils/object_detection.py

10d7e0b verified 10 days ago

raw

history blame contribute delete

2.22 kB

	import cv2
	import numpy as np
	from ultralytics import YOLO

	# Model load (Tiny version for faster real-time performance)
	model = YOLO('yolov8n.pt')

	def detect_objects(frame, confidence=0.45):
	"""
	Optimized Detection for Blind Assistance
	- imgsz=320: High FPS (Smooth video)
	- focal_length adjusted for calibrated distance
	"""

	# Run Prediction
	results = model.predict(
	frame,
	conf=confidence,
	imgsz=320,
	verbose=False,
	half=False # CPU ke liye False, agar GPU ho toh True kar dein
	)

	detections = []

	# Calibration: Realistic object widths (meters)
	# In widths ko accurate rakhna 100% result ke liye zaroori hai
	REAL_WIDTHS = {
	"person": 0.50,
	"cell phone": 0.08,
	"laptop": 0.35,
	"bottle": 0.07,
	"cup": 0.10,
	"chair": 0.55,
	"tv": 0.80,
	"keyboard": 0.40,
	"mouse": 0.06
	}

	# Optimized Focal Length for standard webcams
	# (Distance = RealWidth * Focal / PixelWidth)
	FOCAL_LENGTH = 650

	for r in results:
	# Image dimensions for coordinate normalization
	img_h, img_w = frame.shape[:2]

	for box in r.boxes:
	# Bounding box coordinates
	coords = box.xyxy[0].tolist()
	x1, y1, x2, y2 = map(int, coords)

	w_px = x2 - x1
	h_px = y2 - y1

	# Label selection
	cls_id = int(box.cls[0])
	label = model.names[cls_id]

	# Smart Distance Calculation
	real_w = REAL_WIDTHS.get(label, 0.25) # Default 0.25m if unknown
	distance = (real_w * FOCAL_LENGTH) / w_px if w_px > 0 else 0

	# Final Clean Object
	detections.append({
	"label": label,
	"x": x1,
	"y": y1,
	"w": w_px,
	"h": h_px,
	"distance": round(distance, 2)
	})

	# Sirf top objects (distance wise) bhejein taake JS overload na ho
	detections = sorted(detections, key=lambda x: x['distance'])[:5]

	return detections