Spaces:

Insta360-Research
/

DAP

Running on Zero

App Files Files Community

Insta360-Research commited on 9 days ago

Commit

07df94d

verified ·

1 Parent(s): 72731f5

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -93

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import absolute_import, division, print_function
-import os, sys
 import cv2
 import yaml
 import numpy as np
@@ -9,12 +10,13 @@ from huggingface_hub import hf_hub_download
 # ================== 必须最早 import spaces ==================
 try:
-    import spaces
     gpu_decorator = spaces.GPU
 except Exception:
     gpu_decorator = lambda f: f
-# ================== 工程路径 ==================
 PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(PROJECT_ROOT)
@@ -23,38 +25,23 @@ from networks.models import make  # noqa: E402
 # ================== HF 模型配置 ==================
 WEIGHTS_REPO = "Insta360-Research/DAP-weights"
 WEIGHTS_FILE = "model.pth"
-CONFIG_PATH = "config/infer.yaml"
 model = None
 device = "cpu"
-# ================== 自适应可视化（与测试脚本一致） ==================
 import matplotlib
-def colorize_depth_adaptive(
-    depth: np.ndarray,
-    cmap: str = "Spectral",
-    depth_truncation: float = 1.0
-) -> np.ndarray:
     """
-    depth: float32 depth map (H, W)
-    depth_truncation: 归一化后的截断阈值（0~1），超过的都视为最远
     return: RGB uint8
     """
-    if depth is None:
-        return None
-    dmin = float(np.min(depth))
-    dmax = float(np.max(depth))
-    denom = (dmax - dmin) + 1e-8
-    depth_normalized = (depth - dmin) / denom
-    depth_normalized = np.clip(depth_normalized, 0.0, float(depth_truncation))
-    depth_normalized = depth_normalized / (float(depth_truncation) + 1e-8)
-    colored = matplotlib.colormaps[cmap](depth_normalized)[..., :3]
-    colored = np.ascontiguousarray((colored.clip(0, 1) * 255).astype(np.uint8))
-    return colored
 # ================== 模型加载 ==================
 def load_model(config_path: str):
@@ -68,7 +55,10 @@ def load_model(config_path: str):
         config = yaml.load(f, Loader=yaml.FullLoader)
     print(f"Downloading weights from HF: {WEIGHTS_REPO}/{WEIGHTS_FILE}")
-    model_path = hf_hub_download(repo_id=WEIGHTS_REPO, filename=WEIGHTS_FILE)
     print(f"✅ Weights downloaded to: {model_path}")
     state = torch.load(model_path, map_location=device)
@@ -79,20 +69,18 @@ def load_model(config_path: str):
     m = m.to(device)
     m_state = m.state_dict()
-    m.load_state_dict({k: v for k, v in state.items() if k in m_state}, strict=False)
     m.eval()
     print("✅ Model loaded.")
-    return m, config
 # ================== 启动时加载一次模型 ==================
-model, cfg = load_model(CONFIG_PATH)
-# ====== 从 config 读推理尺寸（和你的测试脚本默认一致） ======
-infer_cfg = cfg.get("inference", {}) if isinstance(cfg, dict) else {}
-INFER_H = int(infer_cfg.get("height", 512))
-INFER_W = int(infer_cfg.get("width", 1024))
-# ================== 推理函数（修复：resize + 强制 dict/mask 逻辑一致） ==================
 @gpu_decorator
 def infer_raw(img_rgb: np.ndarray):
     if img_rgb is None:
@@ -100,48 +88,34 @@ def infer_raw(img_rgb: np.ndarray):
     import torch
-    # 1) resize 到固定输入尺寸（与测试脚本一致）
-    img_resized = cv2.resize(img_rgb, (INFER_W, INFER_H), interpolation=cv2.INTER_LINEAR)
-    # 2) normalize
-    img = img_resized.astype(np.float32) / 255.0
     tensor = torch.from_numpy(img.transpose(2, 0, 1)).unsqueeze(0).to(device)
     with torch.inference_mode():
         outputs = model(tensor)
-        # 3) 强制走 dict + pred_mask/pred_depth 逻辑，避免 fallback 导致不一致
-        if not (isinstance(outputs, dict) and ("pred_depth" in outputs)):
-            raise RuntimeError(
-                f"Model output format unexpected. Expect dict with key 'pred_depth', got: {type(outputs)}"
-            )
-        # mask 处理（与测试脚本一致）
-        if "pred_mask" in outputs:
-            outputs["pred_mask"] = 1 - outputs["pred_mask"]
-            outputs["pred_mask"] = outputs["pred_mask"] > 0.5
-            outputs["pred_depth"][~outputs["pred_mask"]] = 1
-        pred = outputs["pred_depth"][0].detach().cpu().squeeze().numpy()
     return pred.astype(np.float32)
-# ================== 可视化：改为自适应映射（问题1） ==================
 def visualize_100m(pred: np.ndarray):
     if pred is None:
         return None, None, None
-    # 与你的测试脚本默认一致：depth_truncation = 1.0
-    depth_color = colorize_depth_adaptive(pred, cmap="Spectral", depth_truncation=1.0)
-    # 灰度图也用同样的“自适应”方式生成（便于对齐观感）
-    dmin = float(np.min(pred))
-    dmax = float(np.max(pred))
-    gray = ((pred - dmin) / ((dmax - dmin) + 1e-8))
-    gray = np.clip(gray, 0.0, 1.0)
-    depth_gray = (gray * 255).astype(np.uint8)
-    npy_path = "/tmp/depth.npy"
     np.save(npy_path, pred)
     return depth_color, depth_gray, npy_path
@@ -150,24 +124,19 @@ def visualize_10m(pred: np.ndarray):
     if pred is None:
         return None, None, None
-    # “近处更细”：相当于把可视化截断阈值调小（你测试脚本的 depth_truncation 思路）
-    depth_color = colorize_depth_adaptive(pred, cmap="Spectral", depth_truncation=0.1)
-    dmin = float(np.min(pred))
-    dmax = float(np.max(pred))
-    gray = ((pred - dmin) / ((dmax - dmin) + 1e-8))
-    gray = np.clip(gray, 0.0, 0.1) / 0.1
-    depth_gray = (gray * 255).astype(np.uint8)
-    npy_path = "/tmp/depth.npy"
     np.save(npy_path, pred)
     return depth_color, depth_gray, npy_path
 @gpu_decorator
 def infer_and_vis_100m(img_rgb: np.ndarray):
-    pred = infer_raw(img_rgb)          # 跑模型一次（GPU）
-    color, gray, npy = visualize_100m(pred)  # 默认可视化（CPU）
     return pred, color, gray, npy
 # ================== Gradio UI ==================
@@ -191,9 +160,10 @@ example_gen_paths = [
 with gr.Blocks() as demo:
     gr.Markdown("# DAP Depth Prediction Demo")
-    raw_depth = gr.State()  # 保存模型输出
     with gr.Row():
         with gr.Column(scale=1):
             inp = gr.Image(type="numpy", label="Input Image", height=360)
@@ -208,28 +178,52 @@ with gr.Blocks() as demo:
             btn_10m = gr.Button("Visualize (10m)")
             gr.Markdown(
-                f"""
                 <small>
-                <b>Inference resize:</b> {INFER_W}×{INFER_H}<br>
-                <b>Visualization:</b><br>
-                • <b>100m</b>: truncation=1.0（默认）<br>
-                • <b>10m</b>: truncation=0.1（近处更细）<br>
                 </small>
-                """
             )
         with gr.Column(scale=2):
             out_color = gr.Image(label="Depth (Color)", height=260)
             out_gray = gr.Image(label="Depth (Gray)", height=260)
             out_npy = gr.File(label="Depth (.npy)")
-    btn_infer.click(fn=infer_and_vis_100m, inputs=inp, outputs=[raw_depth, out_color, out_gray, out_npy])
-    btn_100m.click(fn=visualize_100m, inputs=raw_depth, outputs=[out_color, out_gray, out_npy])
-    btn_10m.click(fn=visualize_10m, inputs=raw_depth, outputs=[out_color, out_gray, out_npy])
-demo.launch(
-    server_name="0.0.0.0",
-    server_port=7860,
-    ssr_mode=False,
-    show_error=True,
-)

 from __future__ import absolute_import, division, print_function
+import os
+import sys
 import cv2
 import yaml
 import numpy as np
 # ================== 必须最早 import spaces ==================
 try:
+    import spaces  # type: ignore
     gpu_decorator = spaces.GPU
 except Exception:
     gpu_decorator = lambda f: f
+# ================== 工程路径：确保能 import networks ==================
+# 适配：无论你从哪里启动 python app.py，都能找到项目根目录
 PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(PROJECT_ROOT)
 # ================== HF 模型配置 ==================
 WEIGHTS_REPO = "Insta360-Research/DAP-weights"
 WEIGHTS_FILE = "model.pth"
+CONFIG_PATH = os.path.join(PROJECT_ROOT, "config", "infer.yaml")
 model = None
 device = "cpu"
+# ================== 固定颜色映射（颜色一致） ==================
 import matplotlib
+def colorize_depth_fixed(depth_u8: np.ndarray, cmap: str = "Spectral") -> np.ndarray:
     """
+    depth_u8: uint8, 0~255
     return: RGB uint8
     """
+    disp = depth_u8.astype(np.float32) / 255.0
+    colored = matplotlib.colormaps[cmap](disp)[..., :3]
+    colored = (colored * 255).astype(np.uint8)
+    return np.ascontiguousarray(colored)
 # ================== 模型加载 ==================
 def load_model(config_path: str):
         config = yaml.load(f, Loader=yaml.FullLoader)
     print(f"Downloading weights from HF: {WEIGHTS_REPO}/{WEIGHTS_FILE}")
+    model_path = hf_hub_download(
+        repo_id=WEIGHTS_REPO,
+        filename=WEIGHTS_FILE
+    )
     print(f"✅ Weights downloaded to: {model_path}")
     state = torch.load(model_path, map_location=device)
     m = m.to(device)
     m_state = m.state_dict()
+    m.load_state_dict(
+        {k: v for k, v in state.items() if k in m_state},
+        strict=False
+    )
     m.eval()
     print("✅ Model loaded.")
+    return m
 # ================== 启动时加载一次模型 ==================
+model = load_model(CONFIG_PATH)
+# ================== 推理函数 ==================
 @gpu_decorator
 def infer_raw(img_rgb: np.ndarray):
     if img_rgb is None:
     import torch
+    # 保持你原逻辑：不 resize，直接喂入
+    img = img_rgb.astype(np.float32) / 255.0
     tensor = torch.from_numpy(img.transpose(2, 0, 1)).unsqueeze(0).to(device)
     with torch.inference_mode():
         outputs = model(tensor)
+        if isinstance(outputs, dict) and "pred_depth" in outputs:
+            if "pred_mask" in outputs:
+                mask = 1 - outputs["pred_mask"]
+                mask = mask > 0.5
+                outputs["pred_depth"][~mask] = 1
+            pred = outputs["pred_depth"][0].cpu().squeeze().numpy()
+        else:
+            # 保持你原逻辑的 fallback
+            pred = outputs[0].cpu().squeeze().numpy()
     return pred.astype(np.float32)
 def visualize_100m(pred: np.ndarray):
     if pred is None:
         return None, None, None
+    pred_clip = np.clip(pred, 0.0, 1.0)
+    depth_gray = (pred_clip * 255).astype(np.uint8)
+    depth_color = colorize_depth_fixed(depth_gray, cmap="Spectral")
+    npy_path = "/tmp/depth_100m.npy"
     np.save(npy_path, pred)
     return depth_color, depth_gray, npy_path
     if pred is None:
         return None, None, None
+    pred_clip = np.clip(pred, 0.0, 0.1)
+    depth_gray = (pred_clip * 10 * 255).astype(np.uint8)
+    depth_color = colorize_depth_fixed(depth_gray, cmap="Spectral")
+    npy_path = "/tmp/depth_10m.npy"
     np.save(npy_path, pred)
     return depth_color, depth_gray, npy_path
 @gpu_decorator
 def infer_and_vis_100m(img_rgb: np.ndarray):
+    pred = infer_raw(img_rgb)                 # 跑模型一次（GPU）
+    color, gray, npy = visualize_100m(pred)   # 默认100m显示（CPU）
     return pred, color, gray, npy
 # ================== Gradio UI ==================
 with gr.Blocks() as demo:
     gr.Markdown("# DAP Depth Prediction Demo")
+    raw_depth = gr.State()  # 🔑 保存模型输出
     with gr.Row():
+        # ========== Left ==========
         with gr.Column(scale=1):
             inp = gr.Image(type="numpy", label="Input Image", height=360)
             btn_10m = gr.Button("Visualize (10m)")
             gr.Markdown(
+                """
                 <small>
+                <b>Visualization range:</b><br>
+                • <b>100m</b>: recommended for <b>outdoor</b> scenes<br>
+                • <b>10m</b>: recommended for <b>indoor</b> scenes<br>
+                (Only affects visualization, not the raw depth output)
                 </small>
+                """,
+                elem_id="vis_hint",
             )
+        # ========== Right ==========
         with gr.Column(scale=2):
             out_color = gr.Image(label="Depth (Color)", height=260)
             out_gray = gr.Image(label="Depth (Gray)", height=260)
             out_npy = gr.File(label="Depth (.npy)")
+    # 1️⃣ 跑模型
+    btn_infer.click(
+        fn=infer_and_vis_100m,
+        inputs=inp,
+        outputs=[raw_depth, out_color, out_gray, out_npy],
+    )
+    # 2️⃣ 100m
+    btn_100m.click(
+        fn=visualize_100m,
+        inputs=raw_depth,
+        outputs=[out_color, out_gray, out_npy],
+    )
+    # 3️⃣ 10m
+    btn_10m.click(
+        fn=visualize_10m,
+        inputs=raw_depth,
+        outputs=[out_color, out_gray, out_npy],
+    )
+if __name__ == "__main__":
+    # 适配“放到网页里”：建议用环境变量控制 host/port
+    host = os.environ.get("HOST", "0.0.0.0")
+    port = int(os.environ.get("PORT", "7860"))
+    demo.launch(
+        server_name=host,
+        server_port=port,
+        ssr_mode=False,
+        show_error=True,
+    )