Spaces:
Sleeping
Sleeping
| import functools | |
| import json | |
| from typing import Any, Dict, Tuple | |
| import cv2 | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| from depth_estimation import ( | |
| compute_depth_metrics, | |
| depth_metrics_table, | |
| depth_to_heatmap, | |
| load_midas, | |
| midas_depth, | |
| sgbm_depth, | |
| ) | |
| from object_distance import ( | |
| compute_evaluation_metrics, | |
| draw_detections, | |
| estimate_distances, | |
| estimate_focal_length, | |
| load_yolo, | |
| metrics_table, | |
| run_yolo, | |
| ) | |
| MIDAS_MODELS = ["MiDaS_small", "DPT_Hybrid", "DPT_Large", "MiDaS"] | |
| YOLO_MODELS = ["yolov5n", "yolov5s", "yolov5m", "yolov5l", "yolov5x"] | |
| def _ensure_bgr(img: np.ndarray) -> np.ndarray: | |
| # Gradio passes images as RGB numpy arrays (H,W,3). | |
| if img is None: | |
| raise gr.Error("Please upload an image.") | |
| if img.ndim != 3 or img.shape[2] != 3: | |
| raise gr.Error("Expected an RGB image with 3 channels.") | |
| return cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| def _bgr_to_rgb(img: np.ndarray) -> np.ndarray: | |
| return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| def _get_midas_bundle(model_type: str): | |
| return load_midas(model_type) | |
| def _get_yolo_model(model_name: str, conf: float, iou: float): | |
| return load_yolo(model_name, conf_thresh=conf, iou_thresh=iou) | |
| def _detections_df(detections: list) -> pd.DataFrame: | |
| rows = [] | |
| for det in sorted(detections, key=lambda d: d["distance"] if d.get("distance") is not None else 1e9): | |
| rows.append( | |
| { | |
| "label": det["label"], | |
| "confidence": float(det["conf"]), | |
| "pixel_height": det.get("pixel_height"), | |
| "known_height_m": det.get("known_height_m"), | |
| "bbox_depth_median": det.get("bbox_depth_median"), | |
| "dist_pinhole_m": det.get("dist_pinhole"), | |
| "dist_midas_m": det.get("dist_midas"), | |
| "final_distance_m": det.get("distance"), | |
| "method": det.get("method"), | |
| } | |
| ) | |
| return pd.DataFrame(rows) | |
| def run_depth_task( | |
| image_rgb: np.ndarray, | |
| midas_model_type: str, | |
| baseline_shift_pct: float, | |
| block_size: int, | |
| uniqueness_ratio: int, | |
| speckle_window_size: int, | |
| speckle_range: int, | |
| ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, pd.DataFrame]: | |
| img_bgr = _ensure_bgr(image_rgb) | |
| depth_cl, left_img, right_img = sgbm_depth( | |
| img_bgr, | |
| baseline_shift_pct=float(baseline_shift_pct), | |
| block_size=int(block_size), | |
| uniqueness_ratio=int(uniqueness_ratio), | |
| speckle_window_size=int(speckle_window_size), | |
| speckle_range=int(speckle_range), | |
| ) | |
| midas_model, midas_transform, midas_device = _get_midas_bundle(midas_model_type) | |
| depth_ml = midas_depth(img_bgr, midas_model, midas_transform, midas_device) | |
| classical_heatmap = depth_to_heatmap(depth_cl) | |
| midas_heatmap = depth_to_heatmap(depth_ml) | |
| metrics = compute_depth_metrics(img_bgr, depth_cl, depth_ml) | |
| metrics.update( | |
| { | |
| "midas_model": midas_model_type, | |
| "baseline_shift_pct": float(baseline_shift_pct), | |
| "block_size": int(block_size), | |
| "uniqueness_ratio": int(uniqueness_ratio), | |
| "speckle_window_size": int(speckle_window_size), | |
| "speckle_range": int(speckle_range), | |
| } | |
| ) | |
| metrics_df = pd.DataFrame(depth_metrics_table(metrics), columns=["metric", "value"]) | |
| return ( | |
| _bgr_to_rgb(classical_heatmap), | |
| _bgr_to_rgb(midas_heatmap), | |
| _bgr_to_rgb(np.concatenate([left_img, right_img], axis=1)), | |
| metrics_df, | |
| ) | |
| def run_object_distance_task( | |
| image_rgb: np.ndarray, | |
| yolo_model_name: str, | |
| conf_thresh: float, | |
| iou_thresh: float, | |
| midas_model_type: str, | |
| focal_mode: str, | |
| fov_deg: float, | |
| focal_px: float, | |
| inner_ratio: float, | |
| min_depth_value: float, | |
| blend_weight_pinhole: float, | |
| ) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.DataFrame]: | |
| img_bgr = _ensure_bgr(image_rgb) | |
| if focal_mode == "Estimate from FOV": | |
| focal_length = float(estimate_focal_length(img_bgr.shape[1], fov_deg=float(fov_deg))) | |
| else: | |
| focal_length = float(focal_px) | |
| yolo_model = _get_yolo_model(yolo_model_name, float(conf_thresh), float(iou_thresh)) | |
| # Ensure thresholds match current UI even if cached model exists | |
| yolo_model.conf = float(conf_thresh) | |
| yolo_model.iou = float(iou_thresh) | |
| detections = run_yolo(yolo_model, img_bgr, conf_thresh=float(conf_thresh)) | |
| if not detections: | |
| raise gr.Error("No objects detected. Try lowering the confidence threshold.") | |
| midas_model, midas_transform, midas_device = _get_midas_bundle(midas_model_type) | |
| depth_map = midas_depth(img_bgr, midas_model, midas_transform, midas_device) | |
| detections, eval_context = estimate_distances( | |
| detections, | |
| depth_map, | |
| focal_length=focal_length, | |
| inner_ratio=float(inner_ratio), | |
| min_depth_value=float(min_depth_value), | |
| blend_weight_pinhole=float(blend_weight_pinhole), | |
| ) | |
| metrics = compute_evaluation_metrics(detections, focal_length, eval_context) | |
| annotated = draw_detections(img_bgr, detections) | |
| depth_heatmap = depth_to_heatmap(depth_map) | |
| det_df = _detections_df(detections) | |
| metrics = dict(metrics) | |
| metrics.update( | |
| { | |
| "yolo_model": yolo_model_name, | |
| "midas_model": midas_model_type, | |
| "confidence_threshold": float(conf_thresh), | |
| "iou_threshold": float(iou_thresh), | |
| "focal_length_px": float(focal_length), | |
| } | |
| ) | |
| metrics_df = pd.DataFrame(metrics_table(metrics), columns=["metric", "value"]) | |
| return _bgr_to_rgb(annotated), _bgr_to_rgb(depth_heatmap), det_df, metrics_df | |
| DESCRIPTION = """ | |
| Upload an image and run: | |
| - **Depth Estimation**: Classical SGBM (synthetic stereo) + MiDaS | |
| - **Object Distance**: YOLOv5 detection + metric distance estimation (pinhole + calibrated MiDaS) | |
| Note: first run may download model weights (torch.hub). | |
| """ | |
| # Keep Blocks constructor minimal for compatibility across Gradio versions. | |
| with gr.Blocks(title="CV Project Playground", analytics_enabled=False) as demo: | |
| gr.Markdown("## CV Project Playground") | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Tabs(): | |
| with gr.Tab("Depth Estimation"): | |
| with gr.Row(): | |
| img_in_1 = gr.Image(label="Input image", type="numpy") | |
| with gr.Accordion("Hyperparameters", open=True): | |
| with gr.Row(): | |
| midas_model_1 = gr.Dropdown(MIDAS_MODELS, value="MiDaS_small", label="MiDaS model") | |
| baseline_shift = gr.Slider(0.01, 0.12, value=0.03, step=0.01, label="Stereo baseline shift (fraction of width)") | |
| with gr.Row(): | |
| block_size = gr.Slider(3, 15, value=7, step=2, label="SGBM block size (odd)") | |
| uniqueness = gr.Slider(1, 25, value=10, step=1, label="SGBM uniqueness ratio") | |
| with gr.Row(): | |
| speckle_window = gr.Slider(0, 200, value=100, step=5, label="SGBM speckle window") | |
| speckle_range = gr.Slider(0, 10, value=2, step=1, label="SGBM speckle range") | |
| run_btn_1 = gr.Button("Run Depth Estimation", variant="primary") | |
| with gr.Row(): | |
| out_classical = gr.Image(label="Classical heatmap (SGBM)", type="numpy") | |
| out_midas = gr.Image(label="MiDaS heatmap", type="numpy") | |
| out_stereo = gr.Image(label="Synthetic stereo pair (left | right)", type="numpy") | |
| out_meta_1 = gr.Dataframe(label="Depth metrics (key)", wrap=True) | |
| run_btn_1.click( | |
| fn=run_depth_task, | |
| inputs=[img_in_1, midas_model_1, baseline_shift, block_size, uniqueness, speckle_window, speckle_range], | |
| outputs=[out_classical, out_midas, out_stereo, out_meta_1], | |
| ) | |
| with gr.Tab("Object Distance"): | |
| with gr.Row(): | |
| img_in_2 = gr.Image(label="Input image", type="numpy") | |
| with gr.Accordion("Hyperparameters", open=True): | |
| with gr.Row(): | |
| yolo_model = gr.Dropdown(YOLO_MODELS, value="yolov5s", label="YOLO model") | |
| conf = gr.Slider(0.05, 0.95, value=0.35, step=0.05, label="Confidence threshold") | |
| iou = gr.Slider(0.10, 0.95, value=0.45, step=0.05, label="NMS IoU threshold") | |
| with gr.Row(): | |
| midas_model_2 = gr.Dropdown(MIDAS_MODELS, value="MiDaS_small", label="MiDaS model") | |
| focal_mode = gr.Radio(["Estimate from FOV", "Manual pixels"], value="Estimate from FOV", label="Focal length mode") | |
| with gr.Row(): | |
| fov = gr.Slider(30, 120, value=60, step=1, label="Horizontal FOV (deg)") | |
| focal_px = gr.Number(value=800.0, label="Focal length (px) — used when Manual pixels") | |
| with gr.Row(): | |
| inner_ratio = gr.Slider(0.10, 1.00, value=0.60, step=0.05, label="Depth sampling inner box ratio") | |
| min_depth = gr.Slider(0.00, 0.20, value=0.02, step=0.01, label="Minimum valid MiDaS value") | |
| blend_w = gr.Slider(0.0, 1.0, value=0.55, step=0.05, label="Blend weight (pinhole)") | |
| run_btn_2 = gr.Button("Run Object Distance", variant="primary") | |
| with gr.Row(): | |
| out_annotated = gr.Image(label="Annotated detections (meters)", type="numpy") | |
| out_depth = gr.Image(label="MiDaS depth heatmap", type="numpy") | |
| out_table = gr.Dataframe(label="Detections table", wrap=True) | |
| out_metrics = gr.Dataframe(label="Evaluation metrics (key)", wrap=True) | |
| run_btn_2.click( | |
| fn=run_object_distance_task, | |
| inputs=[ | |
| img_in_2, | |
| yolo_model, | |
| conf, | |
| iou, | |
| midas_model_2, | |
| focal_mode, | |
| fov, | |
| focal_px, | |
| inner_ratio, | |
| min_depth, | |
| blend_w, | |
| ], | |
| outputs=[out_annotated, out_depth, out_table, out_metrics], | |
| ) | |
| with gr.Accordion("Export", open=False): | |
| gr.Markdown( | |
| "For deployments, Hugging Face Spaces expects an `app.py` (this file) and `requirements.txt`." | |
| ) | |
| gr.Markdown("Run locally:") | |
| gr.Code("python app.py") | |
| if __name__ == "__main__": | |
| # Theme moved to launch() in Gradio 6.0+ | |
| demo.launch(theme=gr.themes.Soft()) | |