Spaces:

UEXdo
/

HeightAdaptor

Sleeping

App Files Files Community

PubAccount commited on Apr 25

Commit

dec68f2

verified ·

1 Parent(s): 33a707b

Update app.py

Browse files

Files changed (1) hide show

app.py +317 -10

app.py CHANGED Viewed

@@ -1,14 +1,321 @@
-import gradio as gr
-import spaces
 import torch
-zero = torch.Tensor([0]).cuda()
-print(zero.device) # <-- 'cpu' 🤔
-@spaces.GPU
-def greet(n):
-    print(zero.device) # <-- 'cuda:0' 🤗
-    return f"Hello {zero + n} Tensor"
-demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
-demo.launch()

+"""
+app.py — HeightAdaptor Hugging Face Spaces App
+Backbone : stable-diffusion-v1-5/stable-diffusion-v1-5
+Adaptor  : UEXdo/HeightAdaptor-weight
+"""
+import os, io
 import torch
+import numpy as np
+import matplotlib; matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from PIL import Image
+from torch.nn import functional as F
+from diffusers import StableDiffusionPipeline
+from huggingface_hub import snapshot_download
+from peft import PeftModel
+import gradio as gr
+# ── ZeroGPU compatibility（无 spaces 库时自动降级）─────────────────────
+try:
+    import spaces
+except ImportError:
+    class spaces:
+        @staticmethod
+        def GPU(duration=120):
+            return lambda fn: fn
+from networks.semantic_head import SemanticHead
+from networks.height_head   import HeightHead
+from networks.decoder       import Decoder
+# ══════════════════════════════════════════════════════════════
+#  常量 & 配置
+# ══════════════════════════════════════════════════════════════
+RGB_LATENT_SCALE = 0.18215
+# 通过环境变量可覆盖，否则使用默认 HF Repo ID
+SD_MODEL_ID  = os.environ.get("SD_MODEL_ID",      "stable-diffusion-v1-5/stable-diffusion-v1-5")
+ADAPTOR_REPO = os.environ.get("ADAPTOR_MODEL_ID", "UEXdo/HeightAdaptor-weight")
+DATASET_CFG = {
+    "OpenDC": {"classes_num": 8},
+    "US3D":   {"classes_num": 6},
+}
+LABEL_COLORS = {
+    "OpenDC": {
+        0: (50,125,0), 1: (255,0,0),    2: (0,255,0),   3: (255,0,0),
+        4: (255,255,0), 5: (255,255,255), 6: (0,255,255), 7: (0,0,0),
+    },
+    "US3D": {
+        0: (0,0,0), 1: (0,0,0), 2: (255,0,0),
+        3: (0,255,0), 4: (0,0,255), 5: (255,255,0),
+    },
+}
+TASK_PROMPTS = {
+    "Height Estimation":     "Image to height map",
+    "Semantic Segmentation": "Image to semantic segmentation",
+}
+# ══════════════════════════════════════════════════════════════
+#  启动时下载 Adaptor 权重（缓存到本地，后续无需重复下载）
+# ══════════════════════════════════════════════════════════════
+print(f"📦 Downloading adaptor weights from {ADAPTOR_REPO} ...")
+ADAPTOR_DIR = snapshot_download(repo_id=ADAPTOR_REPO)
+print(f"✅ Weights cached at: {ADAPTOR_DIR}")
+# ══════════════════════════════════════════════════════════════
+#  模型管理（主进程维护 CPU 模型，GPU 子进程 copy-on-use）
+# ══════════════════════════════════════════════════════════════
+_model     = None
+_model_key = None   # (dataset_name, h_type)
+def build_model(dataset_name: str, h_type: str) -> StableDiffusionPipeline:
+    """从 HF Hub 拉取基础模型，叠加 LoRA + 三个自定义 Head，返回 CPU 模型。"""
+    classes_num = DATASET_CFG[dataset_name]["classes_num"]
+    print(f"🔧 Building model — dataset={dataset_name}, h_type={h_type}")
+    # 1. 加载 SD v1.5 基础 Pipeline
+    pipe = StableDiffusionPipeline.from_pretrained(
+        SD_MODEL_ID,
+        torch_dtype=torch.float32,
+        safety_checker=None,
+        requires_safety_checker=False,
+    )
+    # 2. 用 PEFT 把 LoRA 权重注入 UNet
+    pipe.unet = PeftModel.from_pretrained(
+        pipe.unet,
+        os.path.join(ADAPTOR_DIR, "lora"),
+    )
+    # 3. 加载 Decoder
+    pipe.decoder = Decoder(in_channel=320)
+    pipe.decoder.load_state_dict(
+        torch.load(os.path.join(ADAPTOR_DIR, "decoder.pth"), map_location="cpu"))
+    pipe.decoder.eval()
+    # 4. 加载 HeightHead
+    pipe.height_head = HeightHead(in_channels=192, h_type=h_type)
+    pipe.height_head.load_state_dict(
+        torch.load(os.path.join(ADAPTOR_DIR, "height_head.pth"), map_location="cpu"))
+    pipe.height_head.eval()
+    # 5. 加载 SemanticHead（类别数由 dataset 决定）
+    pipe.semantic_head = SemanticHead(in_channels=192, num_classes=classes_num)
+    pipe.semantic_head.load_state_dict(
+        torch.load(os.path.join(ADAPTOR_DIR, "semantic_head.pth"), map_location="cpu"))
+    pipe.semantic_head.eval()
+    print("✅ Model ready (on CPU).")
+    return pipe
+def reload_model(dataset_name: str, h_type: str) -> str:
+    """
+    在主进程中重建模型，供 Gradio 按钮调用。
+    注意：此函数 **不加** @spaces.GPU，直接运行在主进程，
+    全局 _model 更新后，下一次 @spaces.GPU 调用会 fork 到新模型。
+    """
+    global _model, _model_key
+    key = (dataset_name, h_type)
+    if _model is not None and _model_key == key:
+        return f"✅ Already loaded — **{dataset_name}** / **{h_type}**"
+    _model     = build_model(dataset_name, h_type)
+    _model_key = key
+    return f"✅ Model loaded — **{dataset_name}** / **{h_type}**"
+# 启动时预加载默认模型（OpenDC / ER）
+reload_model("OpenDC", "ER")
+# ══════════════════════════════════════════════════════════════
+#  VAE / UNet forward（移除了 DistributedDataParallel 分支，
+#  Spaces 单卡场景不需要）
+# ══════════════════════════════════════════════════════════════
+def _vae_encode(pipe, x: torch.Tensor):
+    """通过 VAE Encoder 前向，返回 (最终特征, 中间特征列表)。"""
+    enc   = pipe.vae.encoder
+    x     = enc.conv_in(x)
+    feats = []
+    for blk in enc.down_blocks:
+        x = blk(x)
+        feats.append(x)
+    x = enc.mid_block(x)
+    x = enc.conv_norm_out(x)
+    x = enc.conv_act(x)
+    x = enc.conv_out(x)
+    return x, feats[:-1]   # 与原始代码一致，丢弃最后一层特征
+def _unet_forward(unet, sample, timestep, enc_hs):
+    t_emb  = unet.get_time_embed(sample=sample, timestep=timestep)
+    emb    = unet.time_embedding(t_emb)
+    enc_hs = unet.process_encoder_hidden_states(
+        encoder_hidden_states=enc_hs, added_cond_kwargs=None)
+    x     = unet.conv_in(sample)
+    skips = (x,)
+    for blk in unet.down_blocks:
+        x, res = blk(hidden_states=x, temb=emb, encoder_hidden_states=enc_hs)
+        skips += res
+    x = unet.mid_block(x, emb, encoder_hidden_states=enc_hs)
+    for blk in unet.up_blocks:
+        res   = skips[-len(blk.resnets):]
+        skips = skips[:-len(blk.resnets)]
+        x = blk(hidden_states=x, temb=emb,
+                 res_hidden_states_tuple=res, encoder_hidden_states=enc_hs)
+    return x
+# ══════════════════════════════════════════════════════════════
+#  GPU 推理（用 @spaces.GPU 装饰，申请最多 120s GPU）
+# ══════════════════════════════════════════════════════════════
+@spaces.GPU(duration=120)
+@torch.no_grad()
+def run_inference(
+    image:        Image.Image,
+    task:         str,
+    dataset_name: str,
+    h_type:       str,
+    mode_type:    str,
+):
+    if image is None:
+        return None, "⚠️ Please upload an image first."
+    if _model is None:
+        return None, "⚠️ Model not loaded — click **Load / Reload Model**."
+    device = "cuda"
+    pipe   = _model
+    pipe.to(device)          # ZeroGPU 子进程拿到 CPU 副本后移到 GPU
+    try:
+        # ── 1. 文本编码 ──────────────────────────────────────
+        tokens   = pipe.tokenizer(
+            TASK_PROMPTS[task], padding="max_length", truncation=True,
+            max_length=pipe.tokenizer.model_max_length, return_tensors="pt")
+        text_emb = pipe.text_encoder(tokens.input_ids.to(device))[0].float()
+        # text_emb: [1, 77, 768]  (SD v1.5 的 text dim 为 768)
+        # ── 2. 图像预处理 → [1, 3, 512, 512] ∈ [-1, 1] ──────
+        img  = image.convert("RGB").resize((512, 512), Image.BILINEAR)
+        arr  = np.array(img, dtype=np.float32).transpose(2, 0, 1)
+        norm = (torch.from_numpy(arr) / 255.0 * 2.0 - 1.0).unsqueeze(0).to(device)
+        # ── 3. VAE 编码 ───────────────────────────────────────
+        h, h_list = _vae_encode(pipe, norm)
+        moments   = pipe.vae.quant_conv(h)
+        mean, lv  = torch.chunk(moments, 2, dim=1)
+        latents   = (mean + torch.exp(0.5 * lv) * torch.randn_like(mean)) * RGB_LATENT_SCALE
+        # ── 4. UNet + 自定义 Decoder ─────────────────────────
+        ts     = torch.ones([latents.shape[0]], device=device) * 999
+        unet_o = _unet_forward(pipe.unet, latents, ts, text_emb)
+        dec_o  = pipe.decoder(unet_o, res_list=h_list[::-1])
+        # ── 5. 任务 Head ──────────────────────────────────────
+        h_out = pipe.height_head(dec_o)
+        s_out = pipe.semantic_head(dec_o)
+        # ── 6. 后处理 & 可视化 ───────────────────────────────
+        if mode_type == "Height Map":
+            pred = F.interpolate(h_out[0].cpu(), (512, 512),
+                                 mode="bilinear", align_corners=False)
+            pred = ((pred + 1.0) / 2.0).clamp(0, 1).squeeze().numpy()
+            fig, ax = plt.subplots(figsize=(6, 5), tight_layout=True)
+            im = ax.imshow(pred, cmap="plasma")
+            fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+            ax.set_title("Predicted Height Map"); ax.axis("off")
+            buf = io.BytesIO()
+            fig.savefig(buf, format="png", dpi=150)
+            plt.close(fig); buf.seek(0)
+            out_img = Image.open(buf).copy()
+            info    = (f"Normalized range: [{pred.min():.4f}, {pred.max():.4f}]\n"
+                       "(0 ≈ 0 m,  1 ≈ 50 m  before denormalization)")
+        else:  # Semantic Map
+            pred   = F.interpolate(s_out, (512, 512), mode="bilinear", align_corners=False)
+            argmax = torch.argmax(pred, dim=1).squeeze().cpu().numpy()
+            canvas = np.zeros((512, 512, 3), dtype=np.uint8)
+            for lbl, col in LABEL_COLORS[dataset_name].items():
+                canvas[argmax == lbl] = col
+            out_img = Image.fromarray(canvas)
+            info    = f"Detected class indices: {np.unique(argmax).tolist()}"
+        return out_img, info
+    finally:
+        # ZeroGPU 子进程结束后 GPU 内存自动释放，
+        # 这里显式移回 CPU 只是额外保险
+        pipe.to("cpu")
+        torch.cuda.empty_cache()
+# ══════════════════════════════════════════════════════════════
+#  Gradio UI
+# ══════════════════════════════════════════════════════════════
+with gr.Blocks(title="HeightAdaptor") as demo:
+    gr.Markdown("""
+    # 🏙️ HeightAdaptor
+    **Remote Sensing Image → Height Map / Semantic Segmentation**
+    Backbone: `stable-diffusion-v1-5` + LoRA adaptor (`UEXdo/HeightAdaptor-weight`) + 自定义 Task Heads
+    """)
+    with gr.Row():
+        # ── 左栏：输入 & 配置 ──────────────────────────────────
+        with gr.Column(scale=1):
+            inp_img = gr.Image(type="pil", label="📷 Input RGB Image")
+            with gr.Group():
+                gr.Markdown("#### ⚙️ Model Config")
+                dataset_radio = gr.Radio(
+                    ["OpenDC", "US3D"], value="OpenDC", label="Dataset")
+                h_type_radio  = gr.Radio(
+                    ["ER", "DR"], value="ER", label="Height Type (h_type)")
+                load_btn  = gr.Button("🔄 Load / Reload Model", variant="secondary")
+                load_info = gr.Markdown("✅ Default model active (OpenDC / ER)")
+            with gr.Group():
+                gr.Markdown("#### 🎯 Inference Config")
+                task_radio = gr.Radio(
+                    ["Height Estimation", "Semantic Segmentation"],
+                    value="Height Estimation", label="Task")
+                mode_radio = gr.Radio(
+                    ["Height Map", "Semantic Map"],
+                    value="Height Map", label="Output Mode")
+            run_btn = gr.Button("🚀 Run Inference", variant="primary", size="lg")
+        # ── 右栏：输出 ──────────────────────────────────────────
+        with gr.Column(scale=1):
+            out_img  = gr.Image(type="pil", label="📊 Output")
+            out_info = gr.Textbox(label="ℹ️ Info", interactive=False, lines=3)
+    gr.Markdown("""
+    ---
+    > ⚠️ **切换 Dataset / Height Type 后，请先点击 Load / Reload Model 再推理。**
+    > 图像会自动缩放至 512 × 512，GPU 推理约需 10–30 秒。
+    """)
+    # ── 事件绑定 ─────────────────────────────────────────────────
+    load_btn.click(
+        fn=reload_model,
+        inputs=[dataset_radio, h_type_radio],
+        outputs=[load_info],
+    )
+    run_btn.click(
+        fn=run_inference,
+        inputs=[inp_img, task_radio, dataset_radio, h_type_radio, mode_radio],
+        outputs=[out_img, out_info],
+    )
+if __name__ == "__main__":
+    demo.launch()