VoiceVision-AI / utils /object_detection.py
azizmeer40's picture
Update utils/object_detection.py
10d7e0b verified
import cv2
import numpy as np
from ultralytics import YOLO
# Model load (Tiny version for faster real-time performance)
model = YOLO('yolov8n.pt')
def detect_objects(frame, confidence=0.45):
"""
Optimized Detection for Blind Assistance
- imgsz=320: High FPS (Smooth video)
- focal_length adjusted for calibrated distance
"""
# Run Prediction
results = model.predict(
frame,
conf=confidence,
imgsz=320,
verbose=False,
half=False # CPU ke liye False, agar GPU ho toh True kar dein
)
detections = []
# Calibration: Realistic object widths (meters)
# In widths ko accurate rakhna 100% result ke liye zaroori hai
REAL_WIDTHS = {
"person": 0.50,
"cell phone": 0.08,
"laptop": 0.35,
"bottle": 0.07,
"cup": 0.10,
"chair": 0.55,
"tv": 0.80,
"keyboard": 0.40,
"mouse": 0.06
}
# Optimized Focal Length for standard webcams
# (Distance = RealWidth * Focal / PixelWidth)
FOCAL_LENGTH = 650
for r in results:
# Image dimensions for coordinate normalization
img_h, img_w = frame.shape[:2]
for box in r.boxes:
# Bounding box coordinates
coords = box.xyxy[0].tolist()
x1, y1, x2, y2 = map(int, coords)
w_px = x2 - x1
h_px = y2 - y1
# Label selection
cls_id = int(box.cls[0])
label = model.names[cls_id]
# Smart Distance Calculation
real_w = REAL_WIDTHS.get(label, 0.25) # Default 0.25m if unknown
distance = (real_w * FOCAL_LENGTH) / w_px if w_px > 0 else 0
# Final Clean Object
detections.append({
"label": label,
"x": x1,
"y": y1,
"w": w_px,
"h": h_px,
"distance": round(distance, 2)
})
# Sirf top objects (distance wise) bhejein taake JS overload na ho
detections = sorted(detections, key=lambda x: x['distance'])[:5]
return detections