import functools import json from typing import Any, Dict, Tuple import cv2 import gradio as gr import numpy as np import pandas as pd from depth_estimation import ( compute_depth_metrics, depth_metrics_table, depth_to_heatmap, load_midas, midas_depth, sgbm_depth, ) from object_distance import ( compute_evaluation_metrics, draw_detections, estimate_distances, estimate_focal_length, load_yolo, metrics_table, run_yolo, ) MIDAS_MODELS = ["MiDaS_small", "DPT_Hybrid", "DPT_Large", "MiDaS"] YOLO_MODELS = ["yolov5n", "yolov5s", "yolov5m", "yolov5l", "yolov5x"] def _ensure_bgr(img: np.ndarray) -> np.ndarray: # Gradio passes images as RGB numpy arrays (H,W,3). if img is None: raise gr.Error("Please upload an image.") if img.ndim != 3 or img.shape[2] != 3: raise gr.Error("Expected an RGB image with 3 channels.") return cv2.cvtColor(img, cv2.COLOR_RGB2BGR) def _bgr_to_rgb(img: np.ndarray) -> np.ndarray: return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) @functools.lru_cache(maxsize=4) def _get_midas_bundle(model_type: str): return load_midas(model_type) @functools.lru_cache(maxsize=8) def _get_yolo_model(model_name: str, conf: float, iou: float): return load_yolo(model_name, conf_thresh=conf, iou_thresh=iou) def _detections_df(detections: list) -> pd.DataFrame: rows = [] for det in sorted(detections, key=lambda d: d["distance"] if d.get("distance") is not None else 1e9): rows.append( { "label": det["label"], "confidence": float(det["conf"]), "pixel_height": det.get("pixel_height"), "known_height_m": det.get("known_height_m"), "bbox_depth_median": det.get("bbox_depth_median"), "dist_pinhole_m": det.get("dist_pinhole"), "dist_midas_m": det.get("dist_midas"), "final_distance_m": det.get("distance"), "method": det.get("method"), } ) return pd.DataFrame(rows) def run_depth_task( image_rgb: np.ndarray, midas_model_type: str, baseline_shift_pct: float, block_size: int, uniqueness_ratio: int, speckle_window_size: int, speckle_range: int, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, pd.DataFrame]: img_bgr = _ensure_bgr(image_rgb) depth_cl, left_img, right_img = sgbm_depth( img_bgr, baseline_shift_pct=float(baseline_shift_pct), block_size=int(block_size), uniqueness_ratio=int(uniqueness_ratio), speckle_window_size=int(speckle_window_size), speckle_range=int(speckle_range), ) midas_model, midas_transform, midas_device = _get_midas_bundle(midas_model_type) depth_ml = midas_depth(img_bgr, midas_model, midas_transform, midas_device) classical_heatmap = depth_to_heatmap(depth_cl) midas_heatmap = depth_to_heatmap(depth_ml) metrics = compute_depth_metrics(img_bgr, depth_cl, depth_ml) metrics.update( { "midas_model": midas_model_type, "baseline_shift_pct": float(baseline_shift_pct), "block_size": int(block_size), "uniqueness_ratio": int(uniqueness_ratio), "speckle_window_size": int(speckle_window_size), "speckle_range": int(speckle_range), } ) metrics_df = pd.DataFrame(depth_metrics_table(metrics), columns=["metric", "value"]) return ( _bgr_to_rgb(classical_heatmap), _bgr_to_rgb(midas_heatmap), _bgr_to_rgb(np.concatenate([left_img, right_img], axis=1)), metrics_df, ) def run_object_distance_task( image_rgb: np.ndarray, yolo_model_name: str, conf_thresh: float, iou_thresh: float, midas_model_type: str, focal_mode: str, fov_deg: float, focal_px: float, inner_ratio: float, min_depth_value: float, blend_weight_pinhole: float, ) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.DataFrame]: img_bgr = _ensure_bgr(image_rgb) if focal_mode == "Estimate from FOV": focal_length = float(estimate_focal_length(img_bgr.shape[1], fov_deg=float(fov_deg))) else: focal_length = float(focal_px) yolo_model = _get_yolo_model(yolo_model_name, float(conf_thresh), float(iou_thresh)) # Ensure thresholds match current UI even if cached model exists yolo_model.conf = float(conf_thresh) yolo_model.iou = float(iou_thresh) detections = run_yolo(yolo_model, img_bgr, conf_thresh=float(conf_thresh)) if not detections: raise gr.Error("No objects detected. Try lowering the confidence threshold.") midas_model, midas_transform, midas_device = _get_midas_bundle(midas_model_type) depth_map = midas_depth(img_bgr, midas_model, midas_transform, midas_device) detections, eval_context = estimate_distances( detections, depth_map, focal_length=focal_length, inner_ratio=float(inner_ratio), min_depth_value=float(min_depth_value), blend_weight_pinhole=float(blend_weight_pinhole), ) metrics = compute_evaluation_metrics(detections, focal_length, eval_context) annotated = draw_detections(img_bgr, detections) depth_heatmap = depth_to_heatmap(depth_map) det_df = _detections_df(detections) metrics = dict(metrics) metrics.update( { "yolo_model": yolo_model_name, "midas_model": midas_model_type, "confidence_threshold": float(conf_thresh), "iou_threshold": float(iou_thresh), "focal_length_px": float(focal_length), } ) metrics_df = pd.DataFrame(metrics_table(metrics), columns=["metric", "value"]) return _bgr_to_rgb(annotated), _bgr_to_rgb(depth_heatmap), det_df, metrics_df DESCRIPTION = """ Upload an image and run: - **Depth Estimation**: Classical SGBM (synthetic stereo) + MiDaS - **Object Distance**: YOLOv5 detection + metric distance estimation (pinhole + calibrated MiDaS) Note: first run may download model weights (torch.hub). """ # Keep Blocks constructor minimal for compatibility across Gradio versions. with gr.Blocks(title="CV Project Playground", analytics_enabled=False) as demo: gr.Markdown("## CV Project Playground") gr.Markdown(DESCRIPTION) with gr.Tabs(): with gr.Tab("Depth Estimation"): with gr.Row(): img_in_1 = gr.Image(label="Input image", type="numpy") with gr.Accordion("Hyperparameters", open=True): with gr.Row(): midas_model_1 = gr.Dropdown(MIDAS_MODELS, value="MiDaS_small", label="MiDaS model") baseline_shift = gr.Slider(0.01, 0.12, value=0.03, step=0.01, label="Stereo baseline shift (fraction of width)") with gr.Row(): block_size = gr.Slider(3, 15, value=7, step=2, label="SGBM block size (odd)") uniqueness = gr.Slider(1, 25, value=10, step=1, label="SGBM uniqueness ratio") with gr.Row(): speckle_window = gr.Slider(0, 200, value=100, step=5, label="SGBM speckle window") speckle_range = gr.Slider(0, 10, value=2, step=1, label="SGBM speckle range") run_btn_1 = gr.Button("Run Depth Estimation", variant="primary") with gr.Row(): out_classical = gr.Image(label="Classical heatmap (SGBM)", type="numpy") out_midas = gr.Image(label="MiDaS heatmap", type="numpy") out_stereo = gr.Image(label="Synthetic stereo pair (left | right)", type="numpy") out_meta_1 = gr.Dataframe(label="Depth metrics (key)", wrap=True) run_btn_1.click( fn=run_depth_task, inputs=[img_in_1, midas_model_1, baseline_shift, block_size, uniqueness, speckle_window, speckle_range], outputs=[out_classical, out_midas, out_stereo, out_meta_1], ) with gr.Tab("Object Distance"): with gr.Row(): img_in_2 = gr.Image(label="Input image", type="numpy") with gr.Accordion("Hyperparameters", open=True): with gr.Row(): yolo_model = gr.Dropdown(YOLO_MODELS, value="yolov5s", label="YOLO model") conf = gr.Slider(0.05, 0.95, value=0.35, step=0.05, label="Confidence threshold") iou = gr.Slider(0.10, 0.95, value=0.45, step=0.05, label="NMS IoU threshold") with gr.Row(): midas_model_2 = gr.Dropdown(MIDAS_MODELS, value="MiDaS_small", label="MiDaS model") focal_mode = gr.Radio(["Estimate from FOV", "Manual pixels"], value="Estimate from FOV", label="Focal length mode") with gr.Row(): fov = gr.Slider(30, 120, value=60, step=1, label="Horizontal FOV (deg)") focal_px = gr.Number(value=800.0, label="Focal length (px) — used when Manual pixels") with gr.Row(): inner_ratio = gr.Slider(0.10, 1.00, value=0.60, step=0.05, label="Depth sampling inner box ratio") min_depth = gr.Slider(0.00, 0.20, value=0.02, step=0.01, label="Minimum valid MiDaS value") blend_w = gr.Slider(0.0, 1.0, value=0.55, step=0.05, label="Blend weight (pinhole)") run_btn_2 = gr.Button("Run Object Distance", variant="primary") with gr.Row(): out_annotated = gr.Image(label="Annotated detections (meters)", type="numpy") out_depth = gr.Image(label="MiDaS depth heatmap", type="numpy") out_table = gr.Dataframe(label="Detections table", wrap=True) out_metrics = gr.Dataframe(label="Evaluation metrics (key)", wrap=True) run_btn_2.click( fn=run_object_distance_task, inputs=[ img_in_2, yolo_model, conf, iou, midas_model_2, focal_mode, fov, focal_px, inner_ratio, min_depth, blend_w, ], outputs=[out_annotated, out_depth, out_table, out_metrics], ) with gr.Accordion("Export", open=False): gr.Markdown( "For deployments, Hugging Face Spaces expects an `app.py` (this file) and `requirements.txt`." ) gr.Markdown("Run locally:") gr.Code("python app.py") if __name__ == "__main__": # Theme moved to launch() in Gradio 6.0+ demo.launch(theme=gr.themes.Soft())