monocular-depth / app.py
kushaal457-lang
Add UniDepth package locally and update requirements
298865c
import streamlit as st
import cv2
import torch
import numpy as np
from ultralytics import YOLO
from streamlit_webrtc import webrtc_streamer, VideoTransformerBase
# Import local UniDepth
from unidepth.models import UniDepthV2
# -----------------------------
# Streamlit setup
# -----------------------------
st.set_page_config(page_title="YOLO + UniDepth Streaming", layout="wide")
st.title("πŸ“‘ YOLO + UniDepth V2 β€” Real 3D Distance (CPU Edition)")
# -----------------------------
# Load models (cached)
# -----------------------------
@st.cache_resource
def load_depth_model():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UniDepthV2.from_pretrained("lpiccinelli/unidepth-v2-vitl14").to(device)
model.eval()
return model, device
@st.cache_resource
def load_yolo():
return YOLO("yolov8n.pt")
depth_model, device = load_depth_model()
yolo_model = load_yolo()
# -----------------------------
# Helper functions
# -----------------------------
def get_depth_points(rgb):
img_t = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0).to(device)/255.0
with torch.no_grad():
pred = depth_model.infer(img_t)
return pred["points"][0].cpu().numpy()
def process_frame(frame_bgr):
frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
points_3d = get_depth_points(frame_rgb)
results = yolo_model(frame_bgr, verbose=False)[0]
for box in results.boxes:
conf = box.conf.item()
if conf < 0.5:
continue
cls_id = int(box.cls.item())
label = yolo_model.names[cls_id]
x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
cx, cy = (x1 + x2)//2, (y1 + y2)//2
X, Y, Z = points_3d[:, cy, cx]
distance = np.sqrt(X**2 + Y**2 + Z**2)
cv2.rectangle(frame_bgr, (x1,y1),(x2,y2),(0,255,0),2)
cv2.putText(frame_bgr, f"{label} {distance:.2f}m", (x1,y1-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,255), 2)
return frame_bgr
# -----------------------------
# WebRTC Video Processor
# -----------------------------
class YOLODepthProcessor(VideoTransformerBase):
def transform(self, frame):
img = frame.to_ndarray(format="bgr24")
return process_frame(img)
# -----------------------------
# WebRTC Streamlit Widget
# -----------------------------
st.warning("This app runs in the cloud β€” your webcam stays private on your device.")
webrtc_streamer(
key="yolo-depth",
video_transformer_factory=YOLODepthProcessor,
async_processing=True,
media_stream_constraints={"video": True, "audio": False},
rtc_configuration={
"iceServers":[
{"urls":["stun:stun.l.google.com:19302"]},
{
"urls":["turn:openrelay.metered.ca:80"],
"username":"openrelayproject",
"credential":"openrelayproject"
}
]
}
)