cv_project_2 / app.py
1javid's picture
Upload 3 files
f0640c4 verified
import functools
import json
from typing import Any, Dict, Tuple
import cv2
import gradio as gr
import numpy as np
import pandas as pd
from depth_estimation import (
compute_depth_metrics,
depth_metrics_table,
depth_to_heatmap,
load_midas,
midas_depth,
sgbm_depth,
)
from object_distance import (
compute_evaluation_metrics,
draw_detections,
estimate_distances,
estimate_focal_length,
load_yolo,
metrics_table,
run_yolo,
)
MIDAS_MODELS = ["MiDaS_small", "DPT_Hybrid", "DPT_Large", "MiDaS"]
YOLO_MODELS = ["yolov5n", "yolov5s", "yolov5m", "yolov5l", "yolov5x"]
def _ensure_bgr(img: np.ndarray) -> np.ndarray:
# Gradio passes images as RGB numpy arrays (H,W,3).
if img is None:
raise gr.Error("Please upload an image.")
if img.ndim != 3 or img.shape[2] != 3:
raise gr.Error("Expected an RGB image with 3 channels.")
return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
def _bgr_to_rgb(img: np.ndarray) -> np.ndarray:
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
@functools.lru_cache(maxsize=4)
def _get_midas_bundle(model_type: str):
return load_midas(model_type)
@functools.lru_cache(maxsize=8)
def _get_yolo_model(model_name: str, conf: float, iou: float):
return load_yolo(model_name, conf_thresh=conf, iou_thresh=iou)
def _detections_df(detections: list) -> pd.DataFrame:
rows = []
for det in sorted(detections, key=lambda d: d["distance"] if d.get("distance") is not None else 1e9):
rows.append(
{
"label": det["label"],
"confidence": float(det["conf"]),
"pixel_height": det.get("pixel_height"),
"known_height_m": det.get("known_height_m"),
"bbox_depth_median": det.get("bbox_depth_median"),
"dist_pinhole_m": det.get("dist_pinhole"),
"dist_midas_m": det.get("dist_midas"),
"final_distance_m": det.get("distance"),
"method": det.get("method"),
}
)
return pd.DataFrame(rows)
def run_depth_task(
image_rgb: np.ndarray,
midas_model_type: str,
baseline_shift_pct: float,
block_size: int,
uniqueness_ratio: int,
speckle_window_size: int,
speckle_range: int,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, pd.DataFrame]:
img_bgr = _ensure_bgr(image_rgb)
depth_cl, left_img, right_img = sgbm_depth(
img_bgr,
baseline_shift_pct=float(baseline_shift_pct),
block_size=int(block_size),
uniqueness_ratio=int(uniqueness_ratio),
speckle_window_size=int(speckle_window_size),
speckle_range=int(speckle_range),
)
midas_model, midas_transform, midas_device = _get_midas_bundle(midas_model_type)
depth_ml = midas_depth(img_bgr, midas_model, midas_transform, midas_device)
classical_heatmap = depth_to_heatmap(depth_cl)
midas_heatmap = depth_to_heatmap(depth_ml)
metrics = compute_depth_metrics(img_bgr, depth_cl, depth_ml)
metrics.update(
{
"midas_model": midas_model_type,
"baseline_shift_pct": float(baseline_shift_pct),
"block_size": int(block_size),
"uniqueness_ratio": int(uniqueness_ratio),
"speckle_window_size": int(speckle_window_size),
"speckle_range": int(speckle_range),
}
)
metrics_df = pd.DataFrame(depth_metrics_table(metrics), columns=["metric", "value"])
return (
_bgr_to_rgb(classical_heatmap),
_bgr_to_rgb(midas_heatmap),
_bgr_to_rgb(np.concatenate([left_img, right_img], axis=1)),
metrics_df,
)
def run_object_distance_task(
image_rgb: np.ndarray,
yolo_model_name: str,
conf_thresh: float,
iou_thresh: float,
midas_model_type: str,
focal_mode: str,
fov_deg: float,
focal_px: float,
inner_ratio: float,
min_depth_value: float,
blend_weight_pinhole: float,
) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.DataFrame]:
img_bgr = _ensure_bgr(image_rgb)
if focal_mode == "Estimate from FOV":
focal_length = float(estimate_focal_length(img_bgr.shape[1], fov_deg=float(fov_deg)))
else:
focal_length = float(focal_px)
yolo_model = _get_yolo_model(yolo_model_name, float(conf_thresh), float(iou_thresh))
# Ensure thresholds match current UI even if cached model exists
yolo_model.conf = float(conf_thresh)
yolo_model.iou = float(iou_thresh)
detections = run_yolo(yolo_model, img_bgr, conf_thresh=float(conf_thresh))
if not detections:
raise gr.Error("No objects detected. Try lowering the confidence threshold.")
midas_model, midas_transform, midas_device = _get_midas_bundle(midas_model_type)
depth_map = midas_depth(img_bgr, midas_model, midas_transform, midas_device)
detections, eval_context = estimate_distances(
detections,
depth_map,
focal_length=focal_length,
inner_ratio=float(inner_ratio),
min_depth_value=float(min_depth_value),
blend_weight_pinhole=float(blend_weight_pinhole),
)
metrics = compute_evaluation_metrics(detections, focal_length, eval_context)
annotated = draw_detections(img_bgr, detections)
depth_heatmap = depth_to_heatmap(depth_map)
det_df = _detections_df(detections)
metrics = dict(metrics)
metrics.update(
{
"yolo_model": yolo_model_name,
"midas_model": midas_model_type,
"confidence_threshold": float(conf_thresh),
"iou_threshold": float(iou_thresh),
"focal_length_px": float(focal_length),
}
)
metrics_df = pd.DataFrame(metrics_table(metrics), columns=["metric", "value"])
return _bgr_to_rgb(annotated), _bgr_to_rgb(depth_heatmap), det_df, metrics_df
DESCRIPTION = """
Upload an image and run:
- **Depth Estimation**: Classical SGBM (synthetic stereo) + MiDaS
- **Object Distance**: YOLOv5 detection + metric distance estimation (pinhole + calibrated MiDaS)
Note: first run may download model weights (torch.hub).
"""
# Keep Blocks constructor minimal for compatibility across Gradio versions.
with gr.Blocks(title="CV Project Playground", analytics_enabled=False) as demo:
gr.Markdown("## CV Project Playground")
gr.Markdown(DESCRIPTION)
with gr.Tabs():
with gr.Tab("Depth Estimation"):
with gr.Row():
img_in_1 = gr.Image(label="Input image", type="numpy")
with gr.Accordion("Hyperparameters", open=True):
with gr.Row():
midas_model_1 = gr.Dropdown(MIDAS_MODELS, value="MiDaS_small", label="MiDaS model")
baseline_shift = gr.Slider(0.01, 0.12, value=0.03, step=0.01, label="Stereo baseline shift (fraction of width)")
with gr.Row():
block_size = gr.Slider(3, 15, value=7, step=2, label="SGBM block size (odd)")
uniqueness = gr.Slider(1, 25, value=10, step=1, label="SGBM uniqueness ratio")
with gr.Row():
speckle_window = gr.Slider(0, 200, value=100, step=5, label="SGBM speckle window")
speckle_range = gr.Slider(0, 10, value=2, step=1, label="SGBM speckle range")
run_btn_1 = gr.Button("Run Depth Estimation", variant="primary")
with gr.Row():
out_classical = gr.Image(label="Classical heatmap (SGBM)", type="numpy")
out_midas = gr.Image(label="MiDaS heatmap", type="numpy")
out_stereo = gr.Image(label="Synthetic stereo pair (left | right)", type="numpy")
out_meta_1 = gr.Dataframe(label="Depth metrics (key)", wrap=True)
run_btn_1.click(
fn=run_depth_task,
inputs=[img_in_1, midas_model_1, baseline_shift, block_size, uniqueness, speckle_window, speckle_range],
outputs=[out_classical, out_midas, out_stereo, out_meta_1],
)
with gr.Tab("Object Distance"):
with gr.Row():
img_in_2 = gr.Image(label="Input image", type="numpy")
with gr.Accordion("Hyperparameters", open=True):
with gr.Row():
yolo_model = gr.Dropdown(YOLO_MODELS, value="yolov5s", label="YOLO model")
conf = gr.Slider(0.05, 0.95, value=0.35, step=0.05, label="Confidence threshold")
iou = gr.Slider(0.10, 0.95, value=0.45, step=0.05, label="NMS IoU threshold")
with gr.Row():
midas_model_2 = gr.Dropdown(MIDAS_MODELS, value="MiDaS_small", label="MiDaS model")
focal_mode = gr.Radio(["Estimate from FOV", "Manual pixels"], value="Estimate from FOV", label="Focal length mode")
with gr.Row():
fov = gr.Slider(30, 120, value=60, step=1, label="Horizontal FOV (deg)")
focal_px = gr.Number(value=800.0, label="Focal length (px) — used when Manual pixels")
with gr.Row():
inner_ratio = gr.Slider(0.10, 1.00, value=0.60, step=0.05, label="Depth sampling inner box ratio")
min_depth = gr.Slider(0.00, 0.20, value=0.02, step=0.01, label="Minimum valid MiDaS value")
blend_w = gr.Slider(0.0, 1.0, value=0.55, step=0.05, label="Blend weight (pinhole)")
run_btn_2 = gr.Button("Run Object Distance", variant="primary")
with gr.Row():
out_annotated = gr.Image(label="Annotated detections (meters)", type="numpy")
out_depth = gr.Image(label="MiDaS depth heatmap", type="numpy")
out_table = gr.Dataframe(label="Detections table", wrap=True)
out_metrics = gr.Dataframe(label="Evaluation metrics (key)", wrap=True)
run_btn_2.click(
fn=run_object_distance_task,
inputs=[
img_in_2,
yolo_model,
conf,
iou,
midas_model_2,
focal_mode,
fov,
focal_px,
inner_ratio,
min_depth,
blend_w,
],
outputs=[out_annotated, out_depth, out_table, out_metrics],
)
with gr.Accordion("Export", open=False):
gr.Markdown(
"For deployments, Hugging Face Spaces expects an `app.py` (this file) and `requirements.txt`."
)
gr.Markdown("Run locally:")
gr.Code("python app.py")
if __name__ == "__main__":
# Theme moved to launch() in Gradio 6.0+
demo.launch(theme=gr.themes.Soft())