Spaces:

1javid
/

cv_project_2

Sleeping

File size: 10,732 Bytes

import functools
import json
from typing import Any, Dict, Tuple

import cv2
import gradio as gr
import numpy as np
import pandas as pd

from depth_estimation import (
    compute_depth_metrics,
    depth_metrics_table,
    depth_to_heatmap,
    load_midas,
    midas_depth,
    sgbm_depth,
)
from object_distance import (
    compute_evaluation_metrics,
    draw_detections,
    estimate_distances,
    estimate_focal_length,
    load_yolo,
    metrics_table,
    run_yolo,
)


MIDAS_MODELS = ["MiDaS_small", "DPT_Hybrid", "DPT_Large", "MiDaS"]
YOLO_MODELS = ["yolov5n", "yolov5s", "yolov5m", "yolov5l", "yolov5x"]


def _ensure_bgr(img: np.ndarray) -> np.ndarray:
    # Gradio passes images as RGB numpy arrays (H,W,3).
    if img is None:
        raise gr.Error("Please upload an image.")
    if img.ndim != 3 or img.shape[2] != 3:
        raise gr.Error("Expected an RGB image with 3 channels.")
    return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)


def _bgr_to_rgb(img: np.ndarray) -> np.ndarray:
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


@functools.lru_cache(maxsize=4)
def _get_midas_bundle(model_type: str):
    return load_midas(model_type)


@functools.lru_cache(maxsize=8)
def _get_yolo_model(model_name: str, conf: float, iou: float):
    return load_yolo(model_name, conf_thresh=conf, iou_thresh=iou)


def _detections_df(detections: list) -> pd.DataFrame:
    rows = []
    for det in sorted(detections, key=lambda d: d["distance"] if d.get("distance") is not None else 1e9):
        rows.append(
            {
                "label": det["label"],
                "confidence": float(det["conf"]),
                "pixel_height": det.get("pixel_height"),
                "known_height_m": det.get("known_height_m"),
                "bbox_depth_median": det.get("bbox_depth_median"),
                "dist_pinhole_m": det.get("dist_pinhole"),
                "dist_midas_m": det.get("dist_midas"),
                "final_distance_m": det.get("distance"),
                "method": det.get("method"),
            }
        )
    return pd.DataFrame(rows)


def run_depth_task(
    image_rgb: np.ndarray,
    midas_model_type: str,
    baseline_shift_pct: float,
    block_size: int,
    uniqueness_ratio: int,
    speckle_window_size: int,
    speckle_range: int,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, pd.DataFrame]:
    img_bgr = _ensure_bgr(image_rgb)

    depth_cl, left_img, right_img = sgbm_depth(
        img_bgr,
        baseline_shift_pct=float(baseline_shift_pct),
        block_size=int(block_size),
        uniqueness_ratio=int(uniqueness_ratio),
        speckle_window_size=int(speckle_window_size),
        speckle_range=int(speckle_range),
    )
    midas_model, midas_transform, midas_device = _get_midas_bundle(midas_model_type)
    depth_ml = midas_depth(img_bgr, midas_model, midas_transform, midas_device)

    classical_heatmap = depth_to_heatmap(depth_cl)
    midas_heatmap = depth_to_heatmap(depth_ml)

    metrics = compute_depth_metrics(img_bgr, depth_cl, depth_ml)
    metrics.update(
        {
            "midas_model": midas_model_type,
            "baseline_shift_pct": float(baseline_shift_pct),
            "block_size": int(block_size),
            "uniqueness_ratio": int(uniqueness_ratio),
            "speckle_window_size": int(speckle_window_size),
            "speckle_range": int(speckle_range),
        }
    )
    metrics_df = pd.DataFrame(depth_metrics_table(metrics), columns=["metric", "value"])

    return (
        _bgr_to_rgb(classical_heatmap),
        _bgr_to_rgb(midas_heatmap),
        _bgr_to_rgb(np.concatenate([left_img, right_img], axis=1)),
        metrics_df,
    )


def run_object_distance_task(
    image_rgb: np.ndarray,
    yolo_model_name: str,
    conf_thresh: float,
    iou_thresh: float,
    midas_model_type: str,
    focal_mode: str,
    fov_deg: float,
    focal_px: float,
    inner_ratio: float,
    min_depth_value: float,
    blend_weight_pinhole: float,
) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.DataFrame]:
    img_bgr = _ensure_bgr(image_rgb)

    if focal_mode == "Estimate from FOV":
        focal_length = float(estimate_focal_length(img_bgr.shape[1], fov_deg=float(fov_deg)))
    else:
        focal_length = float(focal_px)

    yolo_model = _get_yolo_model(yolo_model_name, float(conf_thresh), float(iou_thresh))
    # Ensure thresholds match current UI even if cached model exists
    yolo_model.conf = float(conf_thresh)
    yolo_model.iou = float(iou_thresh)

    detections = run_yolo(yolo_model, img_bgr, conf_thresh=float(conf_thresh))
    if not detections:
        raise gr.Error("No objects detected. Try lowering the confidence threshold.")

    midas_model, midas_transform, midas_device = _get_midas_bundle(midas_model_type)
    depth_map = midas_depth(img_bgr, midas_model, midas_transform, midas_device)

    detections, eval_context = estimate_distances(
        detections,
        depth_map,
        focal_length=focal_length,
        inner_ratio=float(inner_ratio),
        min_depth_value=float(min_depth_value),
        blend_weight_pinhole=float(blend_weight_pinhole),
    )
    metrics = compute_evaluation_metrics(detections, focal_length, eval_context)

    annotated = draw_detections(img_bgr, detections)
    depth_heatmap = depth_to_heatmap(depth_map)
    det_df = _detections_df(detections)

    metrics = dict(metrics)
    metrics.update(
        {
            "yolo_model": yolo_model_name,
            "midas_model": midas_model_type,
            "confidence_threshold": float(conf_thresh),
            "iou_threshold": float(iou_thresh),
            "focal_length_px": float(focal_length),
        }
    )
    metrics_df = pd.DataFrame(metrics_table(metrics), columns=["metric", "value"])
    return _bgr_to_rgb(annotated), _bgr_to_rgb(depth_heatmap), det_df, metrics_df


DESCRIPTION = """
Upload an image and run:
- **Depth Estimation**: Classical SGBM (synthetic stereo) + MiDaS
- **Object Distance**: YOLOv5 detection + metric distance estimation (pinhole + calibrated MiDaS)

Note: first run may download model weights (torch.hub).
"""


# Keep Blocks constructor minimal for compatibility across Gradio versions.
with gr.Blocks(title="CV Project Playground", analytics_enabled=False) as demo:
    gr.Markdown("## CV Project Playground")
    gr.Markdown(DESCRIPTION)

    with gr.Tabs():
        with gr.Tab("Depth Estimation"):
            with gr.Row():
                img_in_1 = gr.Image(label="Input image", type="numpy")

            with gr.Accordion("Hyperparameters", open=True):
                with gr.Row():
                    midas_model_1 = gr.Dropdown(MIDAS_MODELS, value="MiDaS_small", label="MiDaS model")
                    baseline_shift = gr.Slider(0.01, 0.12, value=0.03, step=0.01, label="Stereo baseline shift (fraction of width)")
                with gr.Row():
                    block_size = gr.Slider(3, 15, value=7, step=2, label="SGBM block size (odd)")
                    uniqueness = gr.Slider(1, 25, value=10, step=1, label="SGBM uniqueness ratio")
                with gr.Row():
                    speckle_window = gr.Slider(0, 200, value=100, step=5, label="SGBM speckle window")
                    speckle_range = gr.Slider(0, 10, value=2, step=1, label="SGBM speckle range")

            run_btn_1 = gr.Button("Run Depth Estimation", variant="primary")

            with gr.Row():
                out_classical = gr.Image(label="Classical heatmap (SGBM)", type="numpy")
                out_midas = gr.Image(label="MiDaS heatmap", type="numpy")

            out_stereo = gr.Image(label="Synthetic stereo pair (left | right)", type="numpy")
            out_meta_1 = gr.Dataframe(label="Depth metrics (key)", wrap=True)

            run_btn_1.click(
                fn=run_depth_task,
                inputs=[img_in_1, midas_model_1, baseline_shift, block_size, uniqueness, speckle_window, speckle_range],
                outputs=[out_classical, out_midas, out_stereo, out_meta_1],
            )

        with gr.Tab("Object Distance"):
            with gr.Row():
                img_in_2 = gr.Image(label="Input image", type="numpy")

            with gr.Accordion("Hyperparameters", open=True):
                with gr.Row():
                    yolo_model = gr.Dropdown(YOLO_MODELS, value="yolov5s", label="YOLO model")
                    conf = gr.Slider(0.05, 0.95, value=0.35, step=0.05, label="Confidence threshold")
                    iou = gr.Slider(0.10, 0.95, value=0.45, step=0.05, label="NMS IoU threshold")

                with gr.Row():
                    midas_model_2 = gr.Dropdown(MIDAS_MODELS, value="MiDaS_small", label="MiDaS model")

                focal_mode = gr.Radio(["Estimate from FOV", "Manual pixels"], value="Estimate from FOV", label="Focal length mode")
                with gr.Row():
                    fov = gr.Slider(30, 120, value=60, step=1, label="Horizontal FOV (deg)")
                    focal_px = gr.Number(value=800.0, label="Focal length (px) — used when Manual pixels")

                with gr.Row():
                    inner_ratio = gr.Slider(0.10, 1.00, value=0.60, step=0.05, label="Depth sampling inner box ratio")
                    min_depth = gr.Slider(0.00, 0.20, value=0.02, step=0.01, label="Minimum valid MiDaS value")
                    blend_w = gr.Slider(0.0, 1.0, value=0.55, step=0.05, label="Blend weight (pinhole)")

            run_btn_2 = gr.Button("Run Object Distance", variant="primary")

            with gr.Row():
                out_annotated = gr.Image(label="Annotated detections (meters)", type="numpy")
                out_depth = gr.Image(label="MiDaS depth heatmap", type="numpy")

            out_table = gr.Dataframe(label="Detections table", wrap=True)
            out_metrics = gr.Dataframe(label="Evaluation metrics (key)", wrap=True)

            run_btn_2.click(
                fn=run_object_distance_task,
                inputs=[
                    img_in_2,
                    yolo_model,
                    conf,
                    iou,
                    midas_model_2,
                    focal_mode,
                    fov,
                    focal_px,
                    inner_ratio,
                    min_depth,
                    blend_w,
                ],
                outputs=[out_annotated, out_depth, out_table, out_metrics],
            )

    with gr.Accordion("Export", open=False):
        gr.Markdown(
            "For deployments, Hugging Face Spaces expects an `app.py` (this file) and `requirements.txt`."
        )
        gr.Markdown("Run locally:")
        gr.Code("python app.py")


if __name__ == "__main__":
    # Theme moved to launch() in Gradio 6.0+
    demo.launch(theme=gr.themes.Soft())