Spaces:

dwellbot
/

dwellbot_stream3r

Configuration error

App Files Files Community

brian4dwell commited on Sep 19, 2025

Commit

79b8fec

1 Parent(s): 50cb28d

top_k selection

Browse files

Files changed (3) hide show

.vscode/launch.json +2 -2
app.py +181 -20
configs/stream_session.json +2 -1

.vscode/launch.json CHANGED Viewed

@@ -3,10 +3,10 @@
     "configurations": [
         {
-        "name": "Python: Current File (venv)",
         "type": "debugpy",
         "request": "launch",
-        "program": "${file}",
         "console": "integratedTerminal",
         "cwd": "${workspaceFolder}",
         "envFile": "${workspaceFolder}/.env",

     "configurations": [
         {
+        "name": "Python: UI (conda)",
         "type": "debugpy",
         "request": "launch",
+        "program": "${workspaceFolder}/app.py",
         "console": "integratedTerminal",
         "cwd": "${workspaceFolder}",
         "envFile": "${workspaceFolder}/.env",

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ import glob
 import gc
 import time
 import zipfile
 from typing import Any, Dict, Optional
 from stream3r.models.stream3r import STream3R
 from stream3r.stream_session import StreamSession
@@ -153,6 +154,21 @@ def _resolve_path(file_data) -> Optional[str]:
     return str(file_data)
 def load_session_settings(target_dir: str) -> Dict[str, Any]:
     settings_path = os.path.join(target_dir, "session_settings.json")
     if not os.path.exists(settings_path):
@@ -180,6 +196,130 @@ def sanitize_frame_filter_label(label: Optional[str]) -> str:
     return label.replace('.', '_').replace(':', '').replace(' ', '_')
 # -------------------------------------------------------------------------
 # 1) Core model inference
 # -------------------------------------------------------------------------
@@ -551,22 +691,22 @@ def localize_new_image(
     session.clear()
     try:
-        with torch.no_grad():
-            with torch.amp.autocast(dtype=image_tensor.dtype, device_type=image_tensor.device.type):
-                session.load_cache(kv_cache_path, device=image_tensor.device)
-                existing_predictions = session.get_all_predictions()
-                existing_frames = 0
-                for value in existing_predictions.values():
-                    if isinstance(value, torch.Tensor) and value.dim() >= 2:
-                        existing_frames = max(existing_frames, value.shape[1])
-                session.forward_stream(image_tensor)
-                localized_predictions = session.get_all_predictions()
     except Exception as exc:
         session.clear()
-        torch.cuda.empty_cache()
         return (f"Localization failed: {exc}", gr.update())
     def _extract_frame(tensor: torch.Tensor, index: int) -> np.ndarray:
@@ -706,7 +846,8 @@ def localize_new_image(
         summary_lines.append(f"Warning: failed to update GLB preview ({exc})")
     session.clear()
-    torch.cuda.empty_cache()
     return ("\n".join(summary_lines), localization_glb_path if localization_glb_path else gr.update())
@@ -740,6 +881,8 @@ def gradio_demo(
     # Prepare frame_filter dropdown
     target_dir_images = os.path.join(target_dir, "images")
     frame_filter_choices = build_frame_filter_choices(target_dir_images)
     print("Running run_model...")
     with torch.no_grad():
@@ -749,6 +892,20 @@ def gradio_demo(
     prediction_save_path = os.path.join(target_dir, "predictions.npz")
     np.savez(prediction_save_path, **predictions)
     frame_filter_value = frame_filter if frame_filter is not None else "All"
     session_settings = {
@@ -762,6 +919,9 @@ def gradio_demo(
         "mask_sky": bool(mask_sky),
         "prediction_mode": prediction_mode,
     }
     try:
         with open(os.path.join(target_dir, "session_settings.json"), "w", encoding="utf-8") as handle:
             json.dump(session_settings, handle, indent=2)
@@ -1039,6 +1199,14 @@ with gr.Blocks(
                 session_state_output = gr.File(label="Download Session State", interactive=False)
                 localization_output = gr.Textbox(label="Localization Result", lines=8, interactive=False)
             with gr.Row():
                 submit_btn = gr.Button("Reconstruct", scale=1, variant="primary")
                 clear_btn = gr.ClearButton(
@@ -1047,6 +1215,7 @@ with gr.Blocks(
                         input_images,
                         input_zip,
                         session_state_input,
                         reconstruction_output,
                         log_output,
                         target_dir_output,
@@ -1091,14 +1260,6 @@ with gr.Blocks(
                     mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
                     mask_white_bg = gr.Checkbox(label="Filter White Background", value=False)
-            with gr.Row():
-                localization_image_input = gr.File(
-                    label="Localize Single Image",
-                    file_types=[".png", ".jpg", ".jpeg", ".bmp", ".webp"],
-                    interactive=True,
-                )
-                localize_button = gr.Button("Localize Image", variant="secondary")
     # ---------------------- Examples section ----------------------
     def build_examples_from_folder():
         examples_root = "examples"

 import gc
 import time
 import zipfile
+import functools
 from typing import Any, Dict, Optional
 from stream3r.models.stream3r import STream3R
 from stream3r.stream_session import StreamSession
     return str(file_data)
+STREAM_SESSION_CONFIG_PATH = os.path.join(os.path.dirname(__file__), "configs", "stream_session.json")
+@functools.lru_cache(maxsize=1)
+def load_stream_session_config() -> Dict[str, Any]:
+    try:
+        with open(STREAM_SESSION_CONFIG_PATH, "r", encoding="utf-8") as handle:
+            data = json.load(handle)
+            if isinstance(data, dict):
+                return data
+    except (OSError, json.JSONDecodeError):
+        pass
+    return {}
 def load_session_settings(target_dir: str) -> Dict[str, Any]:
     settings_path = os.path.join(target_dir, "session_settings.json")
     if not os.path.exists(settings_path):
     return label.replace('.', '_').replace(':', '').replace(' ', '_')
+def select_top_k_frames(predictions: Dict[str, np.ndarray], images_dir: str, top_k: int) -> list[Dict[str, Any]]:
+    if top_k <= 0:
+        return []
+    if not os.path.isdir(images_dir):
+        return []
+    image_files = sorted(
+        [fname for fname in os.listdir(images_dir) if not fname.startswith('.')]
+    )
+    extrinsics = predictions.get("extrinsic")
+    if extrinsics is None:
+        return []
+    num_frames = extrinsics.shape[0]
+    if num_frames == 0:
+        return []
+    top_k = min(top_k, num_frames)
+    def _camera_position(extr: np.ndarray) -> np.ndarray:
+        R = extr[:, :3]
+        t = extr[:, 3]
+        return (-R.T @ t).astype(np.float64)
+    positions = np.array([_camera_position(extrinsics[i]) for i in range(num_frames)])
+    forward_vectors = np.array([extrinsics[i][2, :3] for i in range(num_frames)])
+    forward_norms = np.linalg.norm(forward_vectors, axis=1, keepdims=True)
+    forward_vectors = np.divide(forward_vectors, forward_norms, out=np.zeros_like(forward_vectors), where=forward_norms > 0)
+    conf_tensor = predictions.get("world_points_conf")
+    if conf_tensor is None:
+        conf_tensor = predictions.get("depth_conf")
+    quality_scores = np.zeros(num_frames, dtype=np.float64)
+    coverage_scores = np.zeros(num_frames, dtype=np.float64)
+    for idx in range(num_frames):
+        if conf_tensor is not None:
+            conf = conf_tensor[idx].reshape(-1)
+            if conf.size:
+                conf = conf[~np.isnan(conf)]
+                if conf.size:
+                    quality_scores[idx] = float(np.mean(conf))
+                    high_thresh = np.percentile(conf, 75)
+                    coverage_scores[idx] = float(np.mean(conf >= high_thresh))
+                    continue
+            quality_scores[idx] = 0.0
+            coverage_scores[idx] = 0.0
+        else:
+            quality_scores[idx] = 1.0
+            coverage_scores[idx] = 1.0
+    max_cov = coverage_scores.max()
+    if max_cov > 0:
+        coverage_scores = coverage_scores / max_cov
+    else:
+        coverage_scores = np.ones_like(coverage_scores)
+    base_scores = quality_scores * (0.5 + 0.5 * coverage_scores)
+    indices = list(range(num_frames))
+    indices.sort(key=lambda idx: base_scores[idx], reverse=True)
+    bbox_min = positions.min(axis=0)
+    bbox_max = positions.max(axis=0)
+    scene_scale = float(np.linalg.norm(bbox_max - bbox_min))
+    pos_threshold = max(0.1, 0.1 * scene_scale)
+    ori_threshold = 15.0
+    selected = []
+    for idx in indices:
+        if not selected:
+            selected.append(idx)
+        else:
+            accept = False
+            min_dist = min(np.linalg.norm(positions[idx] - positions[j]) for j in selected)
+            max_angle = max(
+                np.degrees(
+                    np.arccos(
+                        np.clip(np.dot(forward_vectors[idx], forward_vectors[j]), -1.0, 1.0)
+                    )
+                )
+                for j in selected
+            )
+            if min_dist >= pos_threshold or max_angle >= ori_threshold:
+                accept = True
+            elif len(selected) < max(1, top_k // 3):
+                accept = True
+            if accept:
+                selected.append(idx)
+        if len(selected) >= top_k:
+            break
+    if len(selected) < top_k:
+        for idx in indices:
+            if idx not in selected:
+                selected.append(idx)
+            if len(selected) >= top_k:
+                break
+    selected = sorted(selected[:top_k])
+    records = []
+    for idx in selected:
+        filename = image_files[idx] if idx < len(image_files) else f"frame_{idx:06d}"
+        records.append(
+            {
+                "index": int(idx),
+                "filename": filename,
+                "score": float(base_scores[idx]),
+                "mean_confidence": float(quality_scores[idx]),
+                "coverage_ratio": float(coverage_scores[idx]),
+            }
+        )
+    return records
 # -------------------------------------------------------------------------
 # 1) Core model inference
 # -------------------------------------------------------------------------
     session.clear()
     try:
+        session.load_cache(kv_cache_path, device=image_tensor.device)
+        existing_predictions = session.get_all_predictions()
+        existing_frames = 0
+        for value in existing_predictions.values():
+            if isinstance(value, torch.Tensor) and value.dim() >= 2:
+                existing_frames = max(existing_frames, value.shape[1])
+        with torch.no_grad():
+            session.forward_stream(image_tensor)
+        localized_predictions = session.get_all_predictions()
     except Exception as exc:
         session.clear()
+        if image_tensor.device.type == "cuda":
+            torch.cuda.empty_cache()
         return (f"Localization failed: {exc}", gr.update())
     def _extract_frame(tensor: torch.Tensor, index: int) -> np.ndarray:
         summary_lines.append(f"Warning: failed to update GLB preview ({exc})")
     session.clear()
+    if image_tensor.device.type == "cuda":
+        torch.cuda.empty_cache()
     return ("\n".join(summary_lines), localization_glb_path if localization_glb_path else gr.update())
     # Prepare frame_filter dropdown
     target_dir_images = os.path.join(target_dir, "images")
     frame_filter_choices = build_frame_filter_choices(target_dir_images)
+    config = load_stream_session_config()
+    top_k_frames = int(config.get("top_k_frames", 0) or 0)
     print("Running run_model...")
     with torch.no_grad():
     prediction_save_path = os.path.join(target_dir, "predictions.npz")
     np.savez(prediction_save_path, **predictions)
+    selected_frames = select_top_k_frames(predictions, target_dir_images, top_k_frames)
+    selected_frames_path = os.path.join(target_dir, "selected_frames.json")
+    if selected_frames:
+        try:
+            with open(selected_frames_path, "w", encoding="utf-8") as handle:
+                json.dump({"top_k": top_k_frames, "frames": selected_frames}, handle, indent=2)
+        except OSError as exc:
+            print(f"Failed to write selected frames: {exc}")
+    elif os.path.exists(selected_frames_path):
+        try:
+            os.remove(selected_frames_path)
+        except OSError:
+            pass
     frame_filter_value = frame_filter if frame_filter is not None else "All"
     session_settings = {
         "mask_sky": bool(mask_sky),
         "prediction_mode": prediction_mode,
     }
+    session_settings["top_k_frames"] = top_k_frames
+    if selected_frames:
+        session_settings["selected_frames"] = [frame["filename"] for frame in selected_frames]
     try:
         with open(os.path.join(target_dir, "session_settings.json"), "w", encoding="utf-8") as handle:
             json.dump(session_settings, handle, indent=2)
                 session_state_output = gr.File(label="Download Session State", interactive=False)
                 localization_output = gr.Textbox(label="Localization Result", lines=8, interactive=False)
+            with gr.Row():
+                localization_image_input = gr.File(
+                    label="Localize Single Image",
+                    file_types=[".png", ".jpg", ".jpeg", ".bmp", ".webp"],
+                    interactive=True,
+                )
+                localize_button = gr.Button("Localize Image", variant="secondary")
             with gr.Row():
                 submit_btn = gr.Button("Reconstruct", scale=1, variant="primary")
                 clear_btn = gr.ClearButton(
                         input_images,
                         input_zip,
                         session_state_input,
+                        localization_image_input,
                         reconstruction_output,
                         log_output,
                         target_dir_output,
                     mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
                     mask_white_bg = gr.Checkbox(label="Filter White Background", value=False)
     # ---------------------- Examples section ----------------------
     def build_examples_from_folder():
         examples_root = "examples"

configs/stream_session.json CHANGED Viewed

@@ -1,3 +1,4 @@
 {
-  "window_size": 25
 }

 {
+  "window_size": 5,
+  "top_k_frames": 18
 }