1javid commited on
Commit
b6066e7
·
verified ·
1 Parent(s): 57f2b23

Upload 4 files

Browse files
Files changed (4) hide show
  1. midas_small.onnx +3 -0
  2. requirements.txt +19 -0
  3. streamlit_app.py +270 -0
  4. yolov5s.pt +3 -0
midas_small.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d8c6cb8f415229daf1eb041024208e2608c9f98e17c81cc7c6ecb449c56fd58
3
+ size 66764249
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy>=1.24.0
2
+ # Streamlit Cloud runs headless Linux; use headless OpenCV wheels.
3
+ # contrib is required for ximgproc (guided filter).
4
+ opencv-python-headless>=4.7.0
5
+ opencv-contrib-python-headless>=4.7.0
6
+ matplotlib>=3.7.0
7
+ scipy>=1.10.0
8
+ urllib3>=2.6.0
9
+ torch>=2.0.0
10
+ torchvision>=0.15.0
11
+ timm>=0.9.0
12
+ ultralytics>=8.0.0
13
+ pandas>=1.5.0
14
+ seaborn>=0.12.0
15
+ requests>=2.28.0
16
+ Pillow>=9.4.0
17
+ PyYAML>=6.0
18
+ tqdm>=4.64.0
19
+ streamlit>=1.35.0
streamlit_app.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import pandas as pd
7
+ import streamlit as st
8
+
9
+ from depth_estimation import (
10
+ depth_to_heatmap,
11
+ load_midas,
12
+ midas_depth,
13
+ sgbm_depth,
14
+ )
15
+ from object_distance import (
16
+ compute_evaluation_metrics,
17
+ draw_detections,
18
+ estimate_distances,
19
+ estimate_focal_length,
20
+ load_yolo,
21
+ run_yolo,
22
+ )
23
+
24
+
25
+ st.set_page_config(page_title="CV Task Playground", layout="wide")
26
+
27
+ MIDAS_MODELS = ["MiDaS_small", "DPT_Hybrid", "DPT_Large", "MiDaS"]
28
+ YOLO_MODELS = ["yolov5n", "yolov5s", "yolov5m", "yolov5l", "yolov5x"]
29
+
30
+
31
+ @st.cache_resource(show_spinner=False)
32
+ def get_midas_bundle(model_type: str):
33
+ return load_midas(model_type)
34
+
35
+
36
+ @st.cache_resource(show_spinner=False)
37
+ def get_yolo_model(model_name: str, conf_thresh: float, iou_thresh: float):
38
+ return load_yolo(model_name, conf_thresh=conf_thresh, iou_thresh=iou_thresh)
39
+
40
+
41
+ def decode_uploaded_image(uploaded_file) -> np.ndarray:
42
+ data = np.frombuffer(uploaded_file.read(), dtype=np.uint8)
43
+ img = cv2.imdecode(data, cv2.IMREAD_COLOR)
44
+ if img is None:
45
+ raise ValueError("Could not decode the uploaded image.")
46
+ return img
47
+
48
+
49
+ def bgr_to_rgb(img: np.ndarray) -> np.ndarray:
50
+ return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
51
+
52
+
53
+ def image_download_bytes(img: np.ndarray) -> bytes:
54
+ ok, encoded = cv2.imencode(".png", img)
55
+ if not ok:
56
+ raise ValueError("Could not encode image for download.")
57
+ return encoded.tobytes()
58
+
59
+
60
+ def detections_to_dataframe(detections: list[dict]) -> pd.DataFrame:
61
+ rows = []
62
+ for det in sorted(detections, key=lambda d: d["distance"] if d.get("distance") is not None else 1e9):
63
+ rows.append({
64
+ "label": det["label"],
65
+ "confidence": round(det["conf"], 4),
66
+ "pixel_height": det.get("pixel_height"),
67
+ "known_height_m": det.get("known_height_m"),
68
+ "bbox_depth_median": det.get("bbox_depth_median"),
69
+ "dist_pinhole_m": det.get("dist_pinhole"),
70
+ "dist_midas_m": det.get("dist_midas"),
71
+ "final_distance_m": det.get("distance"),
72
+ "method": det.get("method"),
73
+ })
74
+ return pd.DataFrame(rows)
75
+
76
+
77
+ st.title("Computer Vision Task Playground")
78
+ st.write("Upload an image, switch between the two tasks, and tune the main hyperparameters interactively.")
79
+
80
+ with st.sidebar:
81
+ st.header("Controls")
82
+ task = st.radio("Task", ["Depth Estimation", "Object Distance"], index=0)
83
+ uploaded_file = st.file_uploader(
84
+ "Upload an image",
85
+ type=["png", "jpg", "jpeg", "bmp", "webp"],
86
+ )
87
+
88
+ if uploaded_file is None:
89
+ st.info("Upload an image to begin.")
90
+ st.stop()
91
+
92
+ try:
93
+ img = decode_uploaded_image(uploaded_file)
94
+ except Exception as exc:
95
+ st.error(str(exc))
96
+ st.stop()
97
+
98
+ left_col, right_col = st.columns([1, 1])
99
+ with left_col:
100
+ st.subheader("Uploaded Image")
101
+ st.image(bgr_to_rgb(img), use_container_width=True)
102
+
103
+ if task == "Depth Estimation":
104
+ with st.sidebar:
105
+ st.subheader("Depth Parameters")
106
+ baseline_shift_pct = st.slider("Stereo baseline shift (%)", 1, 12, 3) / 100.0
107
+ block_size = st.slider("SGBM block size", 3, 15, 7, step=2)
108
+ uniqueness_ratio = st.slider("SGBM uniqueness ratio", 1, 25, 10)
109
+ speckle_window_size = st.slider("SGBM speckle window", 0, 200, 100)
110
+ speckle_range = st.slider("SGBM speckle range", 0, 10, 2)
111
+ midas_model_type = st.selectbox("MiDaS model", MIDAS_MODELS, index=0)
112
+ run_depth = st.button("Run Depth Estimation", type="primary")
113
+
114
+ if run_depth:
115
+ with st.spinner("Running depth estimation..."):
116
+ try:
117
+ depth_cl, left_img, right_img = sgbm_depth(
118
+ img,
119
+ baseline_shift_pct=baseline_shift_pct,
120
+ block_size=block_size,
121
+ uniqueness_ratio=uniqueness_ratio,
122
+ speckle_window_size=speckle_window_size,
123
+ speckle_range=speckle_range,
124
+ )
125
+ midas_model, midas_transform, midas_device = get_midas_bundle(midas_model_type)
126
+ depth_ml = midas_depth(img, midas_model, midas_transform, midas_device)
127
+ classical_heatmap = depth_to_heatmap(depth_cl)
128
+ midas_heatmap = depth_to_heatmap(depth_ml)
129
+ except Exception as exc:
130
+ st.error(f"Depth estimation failed: {exc}")
131
+ st.stop()
132
+
133
+ with right_col:
134
+ st.subheader("Run Summary")
135
+ st.json({
136
+ "midas_model": midas_model_type,
137
+ "baseline_shift_pct": baseline_shift_pct,
138
+ "block_size": block_size,
139
+ "uniqueness_ratio": uniqueness_ratio,
140
+ "speckle_window_size": speckle_window_size,
141
+ "speckle_range": speckle_range,
142
+ "classical_mean_depth": float(depth_cl.mean()),
143
+ "midas_mean_depth": float(depth_ml.mean()),
144
+ })
145
+
146
+ c1, c2 = st.columns(2)
147
+ with c1:
148
+ st.subheader("Classical Stereo Pair")
149
+ st.image(bgr_to_rgb(left_img), caption="Left view", use_container_width=True)
150
+ st.image(bgr_to_rgb(right_img), caption="Synthetic right view", use_container_width=True)
151
+ with c2:
152
+ st.subheader("Depth Heatmaps")
153
+ st.image(bgr_to_rgb(classical_heatmap), caption="Classical SGBM", use_container_width=True)
154
+ st.image(bgr_to_rgb(midas_heatmap), caption=f"MiDaS ({midas_model_type})", use_container_width=True)
155
+
156
+ dl1, dl2 = st.columns(2)
157
+ with dl1:
158
+ st.download_button(
159
+ "Download classical heatmap",
160
+ data=image_download_bytes(classical_heatmap),
161
+ file_name="classical_heatmap.png",
162
+ mime="image/png",
163
+ )
164
+ with dl2:
165
+ st.download_button(
166
+ "Download MiDaS heatmap",
167
+ data=image_download_bytes(midas_heatmap),
168
+ file_name="midas_heatmap.png",
169
+ mime="image/png",
170
+ )
171
+
172
+ else:
173
+ with st.sidebar:
174
+ st.subheader("Detection Parameters")
175
+ yolo_model_name = st.selectbox("YOLO model", YOLO_MODELS, index=1)
176
+ conf_thresh = st.slider("Confidence threshold", 0.05, 0.95, 0.35, step=0.05)
177
+ iou_thresh = st.slider("NMS IoU threshold", 0.10, 0.95, 0.45, step=0.05)
178
+ midas_model_type = st.selectbox("MiDaS model", MIDAS_MODELS, index=0)
179
+ focal_mode = st.radio("Focal length mode", ["Estimate from FOV", "Manual pixels"], index=0)
180
+ if focal_mode == "Estimate from FOV":
181
+ fov_deg = st.slider("Horizontal FOV (deg)", 30, 120, 60)
182
+ focal_length = estimate_focal_length(img.shape[1], fov_deg=fov_deg)
183
+ else:
184
+ focal_length = st.number_input("Focal length (px)", min_value=50.0, value=800.0, step=10.0)
185
+ depth_inner_ratio = st.slider("Depth sampling inner box", 0.10, 1.00, 0.60, step=0.05)
186
+ min_depth_value = st.slider("Minimum valid MiDaS depth", 0.0, 0.2, 0.02, step=0.01)
187
+ blend_weight_pinhole = st.slider("Blend weight: pinhole", 0.0, 1.0, 0.55, step=0.05)
188
+ run_detection = st.button("Run Object Distance", type="primary")
189
+
190
+ if run_detection:
191
+ with st.spinner("Running detection and distance estimation..."):
192
+ try:
193
+ yolo_model = get_yolo_model(yolo_model_name, conf_thresh, iou_thresh)
194
+ yolo_model.conf = conf_thresh
195
+ yolo_model.iou = iou_thresh
196
+ detections = run_yolo(yolo_model, img, conf_thresh=conf_thresh)
197
+ if not detections:
198
+ st.warning("No objects detected with the current settings.")
199
+ st.stop()
200
+
201
+ midas_model, midas_transform, midas_device = get_midas_bundle(midas_model_type)
202
+ depth_map = midas_depth(img, midas_model, midas_transform, midas_device)
203
+ detections, eval_context = estimate_distances(
204
+ detections,
205
+ depth_map,
206
+ focal_length=float(focal_length),
207
+ inner_ratio=depth_inner_ratio,
208
+ min_depth_value=min_depth_value,
209
+ blend_weight_pinhole=blend_weight_pinhole,
210
+ )
211
+ metrics = compute_evaluation_metrics(detections, float(focal_length), eval_context)
212
+ annotated = draw_detections(img, detections)
213
+ depth_heatmap = depth_to_heatmap(depth_map)
214
+ det_df = detections_to_dataframe(detections)
215
+ except Exception as exc:
216
+ st.error(f"Object-distance pipeline failed: {exc}")
217
+ st.stop()
218
+
219
+ with right_col:
220
+ st.subheader("Run Summary")
221
+ st.json({
222
+ "yolo_model": yolo_model_name,
223
+ "midas_model": midas_model_type,
224
+ "focal_length_px": float(focal_length),
225
+ "confidence_threshold": conf_thresh,
226
+ "iou_threshold": iou_thresh,
227
+ "depth_inner_ratio": depth_inner_ratio,
228
+ "min_depth_value": min_depth_value,
229
+ "blend_weight_pinhole": blend_weight_pinhole,
230
+ "detections": len(detections),
231
+ })
232
+
233
+ c1, c2 = st.columns(2)
234
+ with c1:
235
+ st.subheader("Annotated Output")
236
+ st.image(bgr_to_rgb(annotated), use_container_width=True)
237
+ with c2:
238
+ st.subheader("MiDaS Depth")
239
+ st.image(bgr_to_rgb(depth_heatmap), use_container_width=True)
240
+
241
+ st.subheader("Detected Objects")
242
+ st.dataframe(det_df, use_container_width=True)
243
+
244
+ st.subheader("Evaluation Metrics")
245
+ st.json(metrics)
246
+
247
+ csv_bytes = det_df.to_csv(index=False).encode("utf-8")
248
+ metrics_bytes = json.dumps(metrics, indent=2).encode("utf-8")
249
+ d1, d2, d3 = st.columns(3)
250
+ with d1:
251
+ st.download_button(
252
+ "Download annotated image",
253
+ data=image_download_bytes(annotated),
254
+ file_name="detections_with_distance.png",
255
+ mime="image/png",
256
+ )
257
+ with d2:
258
+ st.download_button(
259
+ "Download detections CSV",
260
+ data=csv_bytes,
261
+ file_name="detection_distances.csv",
262
+ mime="text/csv",
263
+ )
264
+ with d3:
265
+ st.download_button(
266
+ "Download metrics JSON",
267
+ data=metrics_bytes,
268
+ file_name="metrics.json",
269
+ mime="application/json",
270
+ )
yolov5s.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b3b748c1e592ddd8868022e8732fde20025197328490623cc16c6f24d0782ee
3
+ size 14808437