import streamlit as st import cv2 import torch import numpy as np from ultralytics import YOLO from streamlit_webrtc import webrtc_streamer, VideoTransformerBase # Import local UniDepth from unidepth.models import UniDepthV2 # ----------------------------- # Streamlit setup # ----------------------------- st.set_page_config(page_title="YOLO + UniDepth Streaming", layout="wide") st.title("📡 YOLO + UniDepth V2 — Real 3D Distance (CPU Edition)") # ----------------------------- # Load models (cached) # ----------------------------- @st.cache_resource def load_depth_model(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = UniDepthV2.from_pretrained("lpiccinelli/unidepth-v2-vitl14").to(device) model.eval() return model, device @st.cache_resource def load_yolo(): return YOLO("yolov8n.pt") depth_model, device = load_depth_model() yolo_model = load_yolo() # ----------------------------- # Helper functions # ----------------------------- def get_depth_points(rgb): img_t = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0).to(device)/255.0 with torch.no_grad(): pred = depth_model.infer(img_t) return pred["points"][0].cpu().numpy() def process_frame(frame_bgr): frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) points_3d = get_depth_points(frame_rgb) results = yolo_model(frame_bgr, verbose=False)[0] for box in results.boxes: conf = box.conf.item() if conf < 0.5: continue cls_id = int(box.cls.item()) label = yolo_model.names[cls_id] x1, y1, x2, y2 = map(int, box.xyxy[0].tolist()) cx, cy = (x1 + x2)//2, (y1 + y2)//2 X, Y, Z = points_3d[:, cy, cx] distance = np.sqrt(X**2 + Y**2 + Z**2) cv2.rectangle(frame_bgr, (x1,y1),(x2,y2),(0,255,0),2) cv2.putText(frame_bgr, f"{label} {distance:.2f}m", (x1,y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,255), 2) return frame_bgr # ----------------------------- # WebRTC Video Processor # ----------------------------- class YOLODepthProcessor(VideoTransformerBase): def transform(self, frame): img = frame.to_ndarray(format="bgr24") return process_frame(img) # ----------------------------- # WebRTC Streamlit Widget # ----------------------------- st.warning("This app runs in the cloud — your webcam stays private on your device.") webrtc_streamer( key="yolo-depth", video_transformer_factory=YOLODepthProcessor, async_processing=True, media_stream_constraints={"video": True, "audio": False}, rtc_configuration={ "iceServers":[ {"urls":["stun:stun.l.google.com:19302"]}, { "urls":["turn:openrelay.metered.ca:80"], "username":"openrelayproject", "credential":"openrelayproject" } ] } )