Spaces:

modventures
/

room-visualizer

Sleeping

App Files Files Community

Muhammad Usman Nazir commited on 1 day ago

Commit

b1d1ff4

1 Parent(s): d331b50

deploy floor visualizer backend

Browse files

Files changed (19) hide show

.devcontainer/Dockerfile +12 -0
.devcontainer/devcontainer.json +22 -0
.dockerignore +13 -0
.gitignore +42 -0
Dockerfile +46 -0
Dockerfile.hf +46 -0
README.md +34 -4
SETUP.md +35 -0
app.py +1159 -0
requirements-base.txt +14 -0
requirements-gpu-cu126.txt +5 -0
requirements-linux-cpu.txt +4 -0
requirements-mac.txt +3 -0
requirements.txt +89 -0
start.sh +14 -0
visualizer.gpu.toml +16 -0
visualizer.hf.toml +14 -0
visualizer.local.toml +13 -0
visualizer.segformer.toml +14 -0

.devcontainer/Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.12-slim
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ffmpeg libglib2.0-0 && \
+    rm -rf /var/lib/apt/lists/*
+COPY requirements-mac.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements-mac.txt
+WORKDIR /workspace
+CMD ["python", "app.py"]

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "name": "Room-Tiler-Dev",
+    "build": {
+      "dockerfile": "Dockerfile",
+      "context": ".."
+    },
+    // Forward the Gradio port
+    "forwardPorts": [7860],
+    // Automatically start the app when the container boots
+    "postCreateCommand": "python /workspace/app.py --share --server-name 0.0.0.0",
+    // VS Code features: Python extension, auto-formatting, etc.
+    "features": {
+      "ghcr.io/devcontainers/features/python:1": { "version": "3.10" }
+    },
+    // Sets the default shell
+    "remoteUser": "root"
+  }

.dockerignore ADDED Viewed

	@@ -0,0 +1,13 @@

+.git
+.devcontainer
+.cache
+__pycache__
+*.py[cod]
+*.egg-info
+venv/
+.venv/
+env/
+data/uploads/
+data/jobs/
+.env
+*.log

.gitignore ADDED Viewed

	@@ -0,0 +1,42 @@

+# Virtual environment
+venv/
+.venv/
+env/
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+.Python
+*.egg-info/
+dist/
+build/
+# Model cache
+.cache/
+~/.cache/huggingface/
+# Environment variables
+.env
+.env.local
+# OS
+.DS_Store
+Thumbs.db
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+# Runtime data (uploads and processed job files)
+data/uploads/
+data/jobs/
+# Logs
+*.log
+uvicorn.log
+data/

Dockerfile ADDED Viewed

	@@ -0,0 +1,46 @@

+FROM python:3.10-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    HF_HOME=/home/user/.cache/huggingface \
+    VISUALIZER_CONFIG=visualizer.hf.toml \
+    HOME=/home/user
+# Install system dependencies (git for compphoto/Intrinsic installation, ffmpeg, glib for OpenCV)
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        ffmpeg \
+        libglib2.0-0 \
+        libgomp1 \
+        build-essential && \
+    rm -rf /var/lib/apt/lists/*
+# Set up a new user named "user" with UID 1000 for Hugging Face permissions
+RUN useradd -m -u 1000 user
+WORKDIR /app
+# Copy requirements files first
+COPY requirements-base.txt ./
+# Install CPU PyTorch/Torchvision first, then other base requirements
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu && \
+    pip install --no-cache-dir -r requirements-base.txt
+# Copy the rest of the application files
+COPY --chown=user:1000 . .
+# Create writable data directories and change ownership
+RUN mkdir -p data/uploads data/jobs && \
+    chown -R user:1000 /app
+# Switch to the non-root user
+USER user
+# Hugging Face Spaces expects the application on port 7860
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

Dockerfile.hf ADDED Viewed

	@@ -0,0 +1,46 @@

+FROM python:3.10-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    HF_HOME=/home/user/.cache/huggingface \
+    VISUALIZER_CONFIG=visualizer.hf.toml \
+    HOME=/home/user
+# Install system dependencies (git for compphoto/Intrinsic installation, ffmpeg, glib for OpenCV)
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        ffmpeg \
+        libglib2.0-0 \
+        libgomp1 \
+        build-essential && \
+    rm -rf /var/lib/apt/lists/*
+# Set up a new user named "user" with UID 1000 for Hugging Face permissions
+RUN useradd -m -u 1000 user
+WORKDIR /app
+# Copy requirements files first
+COPY requirements-base.txt ./
+# Install CPU PyTorch/Torchvision first, then other base requirements
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu && \
+    pip install --no-cache-dir -r requirements-base.txt
+# Copy the rest of the application files
+COPY --chown=user:1000 . .
+# Create writable data directories and change ownership
+RUN mkdir -p data/uploads data/jobs && \
+    chown -R user:1000 /app
+# Switch to the non-root user
+USER user
+# Hugging Face Spaces expects the application on port 7860
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

README.md CHANGED Viewed

@@ -1,10 +1,40 @@
 ---
-title: Room Visualizer
-emoji: 📚
-colorFrom: yellow
 colorTo: purple
-sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Floor Visualizer
+emoji: 🏆
+colorFrom: indigo
 colorTo: purple
+sdk: gradio
+sdk_version: 5.31.0
+app_file: app.py
 pinned: false
+license: mit
+short_description: Visualize custom texture or tiles on your floor
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Local setup
+The Python virtual environment is disposable. To recreate it after deleting `.venv`,
+use the platform-specific commands in [SETUP.md](SETUP.md).
+Quick macOS CPU run:
+```bash
+python3.12 -m venv .venv
+source .venv/bin/activate
+python -m pip install --upgrade pip
+python -m pip install -r requirements-mac.txt
+VISUALIZER_CONFIG=visualizer.local.toml uvicorn app:app --host 0.0.0.0 --port 8002
+```
+GPU run:
+```bash
+python3.12 -m venv .venv
+source .venv/bin/activate
+python -m pip install --upgrade pip
+python -m pip install --index-url https://download.pytorch.org/whl/cu126 torch==2.7.0 torchvision==0.22.0
+python -m pip install -r requirements-base.txt
+VISUALIZER_CONFIG=visualizer.gpu.toml uvicorn app:app --host 0.0.0.0 --port 8002
+```

SETUP.md ADDED Viewed

	@@ -0,0 +1,35 @@

+# Backend Environment Setup
+Use Python 3.12. The `.venv/` directory is disposable and ignored by git.
+## macOS CPU setup
+```bash
+cd backend/floor-visualizer
+python3.12 -m venv .venv
+source .venv/bin/activate
+python -m pip install --upgrade pip
+python -m pip install -r requirements-mac.txt
+VISUALIZER_CONFIG=visualizer.local.toml uvicorn app:app --host 0.0.0.0 --port 8002
+```
+## NVIDIA GPU setup
+Use this on the GPU machine. This installs the CUDA 12.6 PyTorch wheels.
+```bash
+cd backend/floor-visualizer
+python3.12 -m venv .venv
+source .venv/bin/activate
+python -m pip install --upgrade pip
+python -m pip install --index-url https://download.pytorch.org/whl/cu126 torch==2.7.0 torchvision==0.22.0
+python -m pip install -r requirements-base.txt
+VISUALIZER_CONFIG=visualizer.gpu.toml uvicorn app:app --host 0.0.0.0 --port 8002
+```
+The first GPU run downloads `shi-labs/oneformer_ade20k_swin_large` and the depth model into the Hugging Face cache.
+## Notes
+- Environment variables override TOML values, for example `SEGMENTATION_MODEL=segformer`.
+- `requirements.txt` is a full freeze from an existing environment. Prefer the smaller platform files above when recreating `.venv`.

app.py ADDED Viewed

	@@ -0,0 +1,1159 @@

+import asyncio
+import base64
+import io
+import json
+import os
+import shutil
+import time
+try:
+    import tomllib
+except ImportError:
+    try:
+        import tomli as tomllib
+    except ImportError:
+        try:
+            import tomlkit as tomllib
+        except ImportError:
+            raise ImportError(
+                "No TOML library found. Please run on Python 3.11+, or run 'pip install tomli' to support Python 3.10."
+            )
+import uuid
+from pathlib import Path
+import cv2
+import numpy as np
+import torch
+from fastapi import FastAPI, File, HTTPException, Response, UploadFile, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from PIL import Image
+from transformers import (
+    AutoImageProcessor,
+    AutoModelForDepthEstimation,
+    Mask2FormerForUniversalSegmentation,
+    OneFormerForUniversalSegmentation,
+    OneFormerProcessor,
+    SegformerForSemanticSegmentation,
+)
+ADE20K_CLASSES = [
+    "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed",
+    "window", "grass", "cabinet", "sidewalk", "person", "ground", "door",
+    "table", "mountain", "plant", "curtain", "chair", "car", "water",
+    "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field",
+    "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp",
+    "bathtub", "railing", "cushion", "base", "box", "column", "signboard",
+    "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace",
+    "refrigerator", "stairs", "runway", "bookcase", "blind", "coffee table",
+    "toilet", "flower", "book", "hill", "bench", "countertop", "stove",
+    "palm", "kitchen island", "computer", "swivel chair", "boat", "bar",
+    "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower",
+    "chandelier", "awning", "streetlight", "booth", "television", "airplane",
+    "dirt track", "apparel", "pole", "land", "bannister", "escalator",
+    "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship",
+    "fountain", "conveyer belt", "canopy", "washer", "plaything",
+    "swimming pool", "stool", "barrel", "basket", "waterfall", "tent",
+    "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank",
+    "trade name", "microwave", "pot", "animal", "bicycle", "lake",
+    "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce",
+    "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen",
+    "plate", "monitor", "bulletin board", "shower", "radiator", "glass",
+    "clock", "flag",
+]
+def load_config() -> dict:
+    config_path = os.getenv("VISUALIZER_CONFIG")
+    if not config_path:
+        return {}
+    path = Path(config_path).expanduser()
+    if not path.is_absolute():
+        path = Path(__file__).resolve().parent / path
+    if not path.exists():
+        raise RuntimeError(f"VISUALIZER_CONFIG does not exist: {path}")
+    with path.open("rb") as config_file:
+        return tomllib.load(config_file)
+CONFIG = load_config()
+def config_value(env_name: str, section: str, key: str, default):
+    if env_name in os.environ:
+        return os.environ[env_name]
+    return CONFIG.get(section, {}).get(key, default)
+SEGMENTATION_MODEL = str(
+    config_value("SEGMENTATION_MODEL", "models", "segmentation_model", "oneformer")
+).lower()
+ONEFORMER_MODEL_NAME = str(config_value(
+    "ONEFORMER_MODEL_NAME",
+    "models",
+    "oneformer_model_name",
+    "shi-labs/oneformer_ade20k_swin_large",
+))
+MASK2FORMER_MODEL_NAME = str(config_value(
+    "MASK2FORMER_MODEL_NAME",
+    "models",
+    "mask2former_model_name",
+    "facebook/mask2former-swin-small-ade-semantic",
+))
+SEGFORMER_MODEL_NAME = str(config_value(
+    "SEGFORMER_MODEL_NAME",
+    "models",
+    "segformer_model_name",
+    "nvidia/segformer-b2-finetuned-ade-512-512",
+))
+DEPTH_MODEL_NAME = str(config_value(
+    "DEPTH_MODEL_NAME",
+    "models",
+    "depth_model_name",
+    "Intel/dpt-large",
+))
+ENABLE_DEPTH_ESTIMATION = str(config_value(
+    "ENABLE_DEPTH_ESTIMATION",
+    "runtime",
+    "enable_depth_estimation",
+    "1",
+)).lower() in {"1", "true", "yes", "on"}
+INTRINSIC_MODEL_VERSION = str(config_value(
+    "INTRINSIC_MODEL_VERSION",
+    "models",
+    "intrinsic_model_version",
+    "v2",
+))
+ENABLE_INTRINSIC_SHADING = str(config_value(
+    "ENABLE_INTRINSIC_SHADING",
+    "runtime",
+    "enable_intrinsic_shading",
+    "0",
+)).lower() in {"1", "true", "yes", "on"}
+VISUALIZER_DATA_DIR = str(config_value(
+    "VISUALIZER_DATA_DIR",
+    "runtime",
+    "data_dir",
+    "data",
+))
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+seg_processor = None
+seg_model = None
+segmentation_backend = "segformer"
+depth_processor = None
+depth_model = None
+intrinsic_models = None
+def hf_offline() -> bool:
+    return os.getenv("HF_HUB_OFFLINE") == "1" or os.getenv("TRANSFORMERS_OFFLINE") == "1"
+def _load_segmentation_model():
+    global seg_processor, seg_model, segmentation_backend
+    if SEGMENTATION_MODEL == "oneformer":
+        try:
+            print(f"Loading OneFormer: {ONEFORMER_MODEL_NAME} ...", flush=True)
+            seg_processor = OneFormerProcessor.from_pretrained(
+                ONEFORMER_MODEL_NAME,
+                local_files_only=hf_offline(),
+            )
+            seg_model = OneFormerForUniversalSegmentation.from_pretrained(
+                ONEFORMER_MODEL_NAME,
+                local_files_only=hf_offline(),
+            ).to(device)
+            seg_model.eval()
+            segmentation_backend = "oneformer"
+            print("OneFormer loaded.", flush=True)
+            return
+        except Exception as exc:
+            print(f"OneFormer failed ({exc}), falling back to Mask2Former.", flush=True)
+    if SEGMENTATION_MODEL in {"oneformer", "mask2former"}:
+        try:
+            print(f"Loading Mask2Former: {MASK2FORMER_MODEL_NAME} ...", flush=True)
+            seg_processor = AutoImageProcessor.from_pretrained(
+                MASK2FORMER_MODEL_NAME,
+                local_files_only=hf_offline(),
+            )
+            seg_model = Mask2FormerForUniversalSegmentation.from_pretrained(
+                MASK2FORMER_MODEL_NAME,
+                local_files_only=hf_offline(),
+            ).to(device)
+            seg_model.eval()
+            segmentation_backend = "mask2former"
+            print("Mask2Former loaded.", flush=True)
+            return
+        except Exception as exc:
+            print(f"Mask2Former failed ({exc}), falling back to SegFormer.", flush=True)
+    print(f"Loading SegFormer: {SEGFORMER_MODEL_NAME} ...", flush=True)
+    seg_processor = AutoImageProcessor.from_pretrained(
+        SEGFORMER_MODEL_NAME,
+        local_files_only=hf_offline(),
+    )
+    seg_model = SegformerForSemanticSegmentation.from_pretrained(
+        SEGFORMER_MODEL_NAME,
+        local_files_only=hf_offline(),
+    ).to(device)
+    seg_model.eval()
+    segmentation_backend = "segformer"
+    print("SegFormer loaded.", flush=True)
+def _load_intrinsic_model():
+    global intrinsic_models
+    if ENABLE_INTRINSIC_SHADING and intrinsic_models is None:
+        try:
+            print(f"Loading Intrinsic Image Decomposition model: {INTRINSIC_MODEL_VERSION} ...", flush=True)
+            from intrinsic.pipeline import load_models
+            intrinsic_models = load_models(INTRINSIC_MODEL_VERSION, device=str(device))
+            print("Intrinsic model loaded.", flush=True)
+        except Exception as exc:
+            print(f"Intrinsic model failed to load ({exc}). Falling back to luminance shading.", flush=True)
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+DATA_DIR = Path(VISUALIZER_DATA_DIR).resolve()
+UPLOAD_DIR = DATA_DIR / "uploads"
+JOB_DIR = DATA_DIR / "jobs"
+UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+JOB_DIR.mkdir(parents=True, exist_ok=True)
+app.mount("/uploads", StaticFiles(directory=UPLOAD_DIR), name="uploads")
+PRIMARY_FLOOR_CLASSES = {"floor"}
+FLOOR_SURFACE_CLASSES = {
+    "floor", "road", "sidewalk", "ground", "field", "grass", "sand",
+    "runway", "dirt track", "land", "stairs", "step",
+}
+REJECT_SURFACE_CLASSES = {"wall", "ceiling", "building", "sky", "window"}
+OCCLUDER_CLASSES = {
+    "bed", "cabinet", "person", "door", "table", "plant", "curtain", "chair",
+    "car", "painting", "sofa", "shelf", "mirror", "rug", "armchair", "seat", "desk",
+    "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box",
+    "column", "chest of drawers", "counter", "sink", "fireplace",
+    "refrigerator", "bookcase", "blind", "coffee table", "toilet", "bench",
+    "countertop", "stove", "kitchen island", "computer", "swivel chair",
+    "bar", "ottoman", "bottle", "buffet", "poster", "towel", "television",
+    "washer", "plaything", "stool", "basket", "bag", "cradle", "oven",
+    "ball", "food", "microwave", "pot", "dishwasher", "blanket", "sculpture",
+    "vase", "tray", "fan", "plate", "monitor", "shower", "radiator", "clock",
+}
+def class_name_for_id(class_id: int) -> str:
+    return ADE20K_CLASSES[class_id] if class_id < len(ADE20K_CLASSES) else f"class_{class_id}"
+def class_ids(names: set[str]) -> list[int]:
+    return [idx for idx, name in enumerate(ADE20K_CLASSES) if name in names]
+def estimate_depth(img: Image.Image, width: int, height: int):
+    global depth_processor, depth_model
+    if not ENABLE_DEPTH_ESTIMATION:
+        return None
+    model_name = DEPTH_MODEL_NAME
+    try:
+        if depth_processor is None or depth_model is None:
+            print(f"Loading depth model: {model_name} ...", flush=True)
+            depth_processor = AutoImageProcessor.from_pretrained(
+                model_name,
+                local_files_only=hf_offline(),
+            )
+            depth_model = AutoModelForDepthEstimation.from_pretrained(
+                model_name,
+                local_files_only=hf_offline(),
+            ).to(device)
+            depth_model.eval()
+            print("Depth model loaded.", flush=True)
+        inputs = depth_processor(images=img, return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = depth_model(**inputs)
+            depth = torch.nn.functional.interpolate(
+                outputs.predicted_depth.unsqueeze(1),
+                size=(height, width),
+                mode="bicubic",
+                align_corners=False,
+            ).squeeze().cpu().numpy()
+        depth = cv2.GaussianBlur(depth.astype(np.float32), (0, 0), sigmaX=3)
+        depth_min, depth_max = float(np.min(depth)), float(np.max(depth))
+        if depth_max - depth_min < 1e-6:
+            return None
+        return (depth - depth_min) / (depth_max - depth_min)
+    except Exception as exc:
+        print(f"Depth estimation skipped ({exc}).", flush=True)
+        return None
+# ---------------------------------------------------------------------------
+# B4 — Shade Range Expansion
+# Encode the shade multiplier using the actual brightness spread of the floor
+# rather than a hardcoded [0.55, 1.35] clip, so dark-room images preserve the
+# full dynamic range of their shadow patterns.
+# ---------------------------------------------------------------------------
+def _adaptive_shade_range(relative: np.ndarray, floor_mask: np.ndarray) -> tuple[float, float]:
+    floor_vals = relative[floor_mask > 0]
+    if floor_vals.size == 0:
+        return (0.55, 1.35)
+    lo = max(0.25, float(np.percentile(floor_vals, 1)))
+    hi = min(2.5, float(np.percentile(floor_vals, 99)))
+    span = hi - lo
+    if span < 0.4:
+        mid = (lo + hi) / 2.0
+        lo, hi = mid - 0.2, mid + 0.2
+    return lo, hi
+def _encode_shade(relative: np.ndarray, lo: float, hi: float) -> np.ndarray:
+    span = hi - lo
+    return np.round((np.clip(relative, lo, hi) - lo) * (255.0 / span)).clip(0, 255).astype(np.uint8)
+# ---------------------------------------------------------------------------
+# B1 — Shadow Map Extraction
+# Luminance-based shade map; returns (encoded_uint8, (lo, hi)) so the frontend
+# can decode with the correct range.
+# ---------------------------------------------------------------------------
+def build_shade_map(
+    img_np: np.ndarray, surface_mask: np.ndarray
+) -> tuple[np.ndarray | None, tuple[float, float]]:
+    default_range = (0.55, 1.35)
+    if not surface_mask.any():
+        return None, default_range
+    mask = surface_mask.astype(np.uint8)
+    luminance = (
+        img_np[:, :, 0].astype(np.float32) * 0.299
+        + img_np[:, :, 1].astype(np.float32) * 0.587
+        + img_np[:, :, 2].astype(np.float32) * 0.114
+    )
+    h, w = mask.shape[:2]
+    floor_values = luminance[mask > 0]
+    if floor_values.size < max(256, int(h * w * 0.002)):
+        return None, default_range
+    median_lum = float(np.median(floor_values))
+    if median_lum < 1e-3:
+        return None, default_range
+    filled = luminance.copy()
+    filled[mask == 0] = median_lum
+    missing = (mask == 0).astype(np.uint8) * 255
+    try:
+        filled = cv2.inpaint(
+            np.clip(filled, 0, 255).astype(np.uint8),
+            missing,
+            max(3, min(h, w) // 160),
+            cv2.INPAINT_TELEA,
+        ).astype(np.float32)
+    except cv2.error:
+        pass
+    sigma = max(8.0, min(h, w) / 28.0)
+    smooth = cv2.GaussianBlur(filled, (0, 0), sigmaX=sigma, sigmaY=sigma)
+    relative = smooth / median_lum
+    relative[mask == 0] = 1.0
+    lo, hi = _adaptive_shade_range(relative, mask)
+    return _encode_shade(relative, lo, hi), (lo, hi)
+def build_intrinsic_shade_map(
+    img_np: np.ndarray, surface_mask: np.ndarray
+) -> tuple[np.ndarray | None, tuple[float, float]]:
+    default_range = (0.55, 1.35)
+    if not surface_mask.any() or intrinsic_models is None:
+        return None, default_range
+    try:
+        img_float = img_np.astype(np.float32) / 255.0
+        from intrinsic.pipeline import run_pipeline
+        results = run_pipeline(intrinsic_models, img_float, device=str(device))
+        shading = None
+        if "gry_shd" in results:
+            shading = results["gry_shd"]
+        elif "dif_shd" in results:
+            dif = results["dif_shd"]
+            shading = dif[:, :, 0] * 0.299 + dif[:, :, 1] * 0.587 + dif[:, :, 2] * 0.114
+        else:
+            for k in results.keys():
+                if "shd" in k or "shading" in k:
+                    shading = results[k]
+                    if len(shading.shape) == 3:
+                        shading = shading[:, :, 0] * 0.299 + shading[:, :, 1] * 0.587 + shading[:, :, 2] * 0.114
+                    break
+        if shading is None:
+            return None, default_range
+        h, w = surface_mask.shape[:2]
+        if shading.shape[:2] != (h, w):
+            shading = cv2.resize(shading, (w, h), interpolation=cv2.INTER_LINEAR)
+        sigma = max(3.0, min(h, w) / 80.0)
+        shading = cv2.GaussianBlur(shading.astype(np.float32), (0, 0), sigmaX=sigma, sigmaY=sigma)
+        floor_vals = shading[surface_mask > 0]
+        if floor_vals.size == 0:
+            return None, default_range
+        median_val = float(np.median(floor_vals))
+        if median_val < 1e-3:
+            return None, default_range
+        relative_shading = shading / median_val
+        relative_shading[surface_mask == 0] = 1.0
+        lo, hi = _adaptive_shade_range(relative_shading, surface_mask)
+        return _encode_shade(relative_shading, lo, hi), (lo, hi)
+    except Exception as exc:
+        print(f"Intrinsic shading decomposition failed: {exc}. Falling back to default luminance shading.", flush=True)
+        return None, default_range
+# ---------------------------------------------------------------------------
+# B2 — Color Temperature
+# Sample the brightest floor pixels to infer the room's lighting colour cast
+# and approximate Kelvin value.  Returns a dict with `kelvin` and `cast`
+# (normalised RGB multipliers) so the frontend can tint replacement tiles.
+# ---------------------------------------------------------------------------
+def estimate_color_temperature(
+    img_np: np.ndarray, surface_mask: np.ndarray
+) -> dict | None:
+    if not surface_mask.any():
+        return None
+    pixels = img_np[surface_mask > 0].astype(np.float32)
+    if len(pixels) < 100:
+        return None
+    lum = pixels[:, 0] * 0.299 + pixels[:, 1] * 0.587 + pixels[:, 2] * 0.114
+    thresh = float(np.percentile(lum, 70))
+    bright = pixels[lum >= thresh]
+    if len(bright) < 10:
+        bright = pixels
+    mr = float(np.mean(bright[:, 0]))
+    mg = float(np.mean(bright[:, 1]))
+    mb = float(np.mean(bright[:, 2]))
+    ref = max(mr, mg, mb, 1e-3)
+    rb = mr / max(mb, 1.0)
+    if rb > 1.6:
+        kelvin = 2700
+    elif rb > 1.3:
+        kelvin = 3200
+    elif rb > 1.1:
+        kelvin = 4000
+    elif rb > 0.9:
+        kelvin = 5500
+    elif rb > 0.7:
+        kelvin = 6500
+    else:
+        kelvin = 8000
+    return {
+        "kelvin": kelvin,
+        "cast": {"r": round(mr / ref, 4), "g": round(mg / ref, 4), "b": round(mb / ref, 4)},
+    }
+# ---------------------------------------------------------------------------
+# B3 — Light Vector
+# Estimate the primary in-plane light direction from the gradient of the shade
+# map.  Returns a normalised {x, y} vector pointing toward the light source.
+# ---------------------------------------------------------------------------
+def estimate_light_vector(
+    shade_map: np.ndarray | None, surface_mask: np.ndarray
+) -> dict | None:
+    if shade_map is None or not surface_mask.any():
+        return None
+    shade_f = shade_map.astype(np.float32)
+    valid = surface_mask.astype(np.float32)
+    kern = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+    valid_e = cv2.erode(valid, kern, iterations=2)
+    clean = shade_f * valid_e
+    gx = cv2.Sobel(clean, cv2.CV_32F, 1, 0, ksize=15) * valid_e
+    gy = cv2.Sobel(clean, cv2.CV_32F, 0, 1, ksize=15) * valid_e
+    mag = np.hypot(gx, gy)
+    total = float(mag.sum())
+    if total < 1e-6:
+        return None
+    lx = float((gx * mag).sum()) / total
+    ly = float((gy * mag).sum()) / total
+    norm = float(np.hypot(lx, ly))
+    if norm < 1e-6:
+        return None
+    return {"x": round(lx / norm, 4), "y": round(ly / norm, 4)}
+def clean_floor_mask(mask: np.ndarray) -> np.ndarray:
+    if mask.dtype != np.uint8:
+        mask = mask.astype(np.uint8)
+    h, w = mask.shape[:2]
+    min_side = max(3, min(h, w))
+    close_size = max(5, int(round(min_side * 0.018))) | 1
+    open_size = max(3, int(round(min_side * 0.006))) | 1
+    closed = cv2.morphologyEx(
+        mask,
+        cv2.MORPH_CLOSE,
+        cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (close_size, close_size)),
+    )
+    cleaned = cv2.morphologyEx(
+        closed,
+        cv2.MORPH_OPEN,
+        cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (open_size, open_size)),
+    )
+    count, labels, stats, _ = cv2.connectedComponentsWithStats(cleaned, connectivity=8)
+    if count <= 1:
+        return cleaned
+    gravity_threshold = int(h * 0.60)
+    min_area = max(1000, int(h * w * 0.01))
+    result = np.zeros_like(cleaned)
+    for component_id in range(1, count):
+        area = stats[component_id, cv2.CC_STAT_AREA]
+        if area < min_area:
+            continue
+        comp_bottom = stats[component_id, cv2.CC_STAT_TOP] + stats[component_id, cv2.CC_STAT_HEIGHT]
+        if comp_bottom <= gravity_threshold:
+            continue
+        result[labels == component_id] = 1
+    if result.any():
+        return result
+    largest = 1 + int(np.argmax(stats[1:, cv2.CC_STAT_AREA]))
+    return (labels == largest).astype(np.uint8)
+def wall_subtract(mask: np.ndarray, seg_map: np.ndarray, dilation: int = 1) -> np.ndarray:
+    reject_raw = np.isin(seg_map, class_ids(REJECT_SURFACE_CLASSES)).astype(np.uint8)
+    if dilation > 0:
+        kern = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+        reject_raw = cv2.dilate(reject_raw, kern, iterations=dilation)
+    result = mask.copy()
+    result[reject_raw > 0] = 0
+    return result
+def fit_floor_edges(mask: np.ndarray):
+    h, w = mask.shape[:2]
+    row_ys, lefts, rights = [], [], []
+    step = max(1, h // 260)
+    for y in range(0, h, step):
+        row_xs = np.where(mask[y] > 0)[0]
+        if len(row_xs) < max(8, w * 0.01):
+            continue
+        row_ys.append(float(y))
+        lefts.append(float(np.percentile(row_xs, 3)))
+        rights.append(float(np.percentile(row_xs, 97)))
+    if len(row_ys) < 8:
+        return None
+    row_ys_np = np.asarray(row_ys, dtype=np.float32)
+    return np.polyfit(row_ys_np, np.asarray(lefts, dtype=np.float32), 1), np.polyfit(
+        row_ys_np,
+        np.asarray(rights, dtype=np.float32),
+        1,
+    )
+# ---------------------------------------------------------------------------
+# B8 — Convex Hull Quad Fitting
+# Derive a tight bounding quadrilateral from the convex hull of the floor mask.
+# Used alongside the linear edge-fit quad so that corners of L-shaped rooms
+# and irregular floor boundaries are fully covered.
+# ---------------------------------------------------------------------------
+def convex_hull_quad(mask: np.ndarray) -> np.ndarray | None:
+    ys, xs = np.where(mask > 0)
+    if len(xs) < 50:
+        return None
+    pts = np.column_stack([xs, ys]).astype(np.float32)
+    hull = cv2.convexHull(pts)
+    if hull is None or len(hull) < 4:
+        return None
+    rect = cv2.minAreaRect(hull.squeeze())
+    box = cv2.boxPoints(rect)  # (4, 2) — x,y columns
+    h, w = mask.shape[:2]
+    box[:, 0] = np.clip(box[:, 0], 0, w - 1)
+    box[:, 1] = np.clip(box[:, 1], 0, h - 1)
+    return box
+# ---------------------------------------------------------------------------
+# B6 — Dual Vanishing Point Detection
+# Detect two independent VPs: one from positive-slope lines (converging right)
+# and one from negative-slope lines (converging left), covering oblique shots
+# and corner-camera perspectives.
+# ---------------------------------------------------------------------------
+def detect_dual_vanishing_points(
+    img_np: np.ndarray, floor_mask: np.ndarray
+) -> tuple[dict | None, dict | None]:
+    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
+    gray = cv2.GaussianBlur(gray, (5, 5), 0)
+    edges = cv2.Canny(gray, 60, 160)
+    edges[floor_mask == 0] = 0
+    lines = cv2.HoughLinesP(
+        edges,
+        rho=1,
+        theta=np.pi / 180,
+        threshold=60,
+        minLineLength=max(40, min(img_np.shape[:2]) // 16),
+        maxLineGap=24,
+    )
+    if lines is None:
+        return None, None
+    h, w = img_np.shape[:2]
+    pos_lines, neg_lines = [], []
+    for line in lines[:, 0, :]:
+        x1, y1, x2, y2 = [float(v) for v in line]
+        dx, dy = x2 - x1, y2 - y1
+        length = float(np.hypot(dx, dy))
+        if length < 40 or abs(dx) < 1:
+            continue
+        slope = dy / dx
+        if abs(slope) < 0.18:
+            continue
+        entry = (x1, y1, x2, y2, slope, length)
+        if slope > 0:
+            pos_lines.append(entry)
+        else:
+            neg_lines.append(entry)
+    def _find_vp(group: list) -> dict | None:
+        intersections = []
+        for i, (x1, y1, _, _, s1, l1) in enumerate(group):
+            a1 = y1 - s1 * x1
+            for x3, y3, _, _, s2, l2 in group[i + 1:]:
+                if abs(s1 - s2) < 0.08:
+                    continue
+                denom = s1 - s2
+                if abs(denom) < 1e-9:
+                    continue
+                x = (a2 := y3 - s2 * x3, (a2 - a1) / denom)[1]
+                y = s1 * x + a1
+                if -w * 0.6 <= x <= w * 1.6 and -h * 1.2 <= y <= h * 1.0:
+                    intersections.append((x, y, min(l1, l2)))
+        if len(intersections) < 3:
+            return None
+        pts = np.array([[p[0], p[1]] for p in intersections], np.float32)
+        weights = np.array([p[2] for p in intersections], np.float32)
+        center = np.average(pts, axis=0, weights=weights)
+        dist = np.linalg.norm(pts - center, axis=1)
+        keep = dist <= np.percentile(dist, 70)
+        if keep.sum() >= 3:
+            center = np.average(pts[keep], axis=0, weights=weights[keep])
+        return {"x": float(center[0]), "y": float(center[1])}
+    vp_right = _find_vp(pos_lines)   # positive-slope lines converge to the right
+    vp_left = _find_vp(neg_lines)    # negative-slope lines converge to the left
+    # Primary VP = the one whose y is lower in the image (closer to the horizon)
+    candidates = [(vp, abs(vp["y"])) for vp in [vp_right, vp_left] if vp is not None]
+    if not candidates:
+        return None, None
+    candidates.sort(key=lambda t: t[1])
+    primary = candidates[0][0]
+    secondary = candidates[1][0] if len(candidates) > 1 else None
+    return primary, secondary
+def estimate_floor_plane(mask: np.ndarray, img_np: np.ndarray):
+    ys, xs = np.where(mask > 0)
+    if len(xs) < 1000:
+        return None, None
+    xs_f, ys_f = xs.astype(np.float32), ys.astype(np.float32)
+    x1, x2 = float(np.percentile(xs_f, 1)), float(np.percentile(xs_f, 99))
+    y1, y2 = float(np.percentile(ys_f, 1)), float(np.percentile(ys_f, 99))
+    width, height = x2 - x1, y2 - y1
+    if width < 20 or height < 20:
+        return None, None
+    top_y = float(np.percentile(ys_f, 8))
+    bottom_y = float(np.percentile(ys_f, 97))
+    edge_fits = fit_floor_edges(mask)
+    if edge_fits is None:
+        return None, None
+    left_fit, right_fit = edge_fits
+    top_left = float(np.polyval(left_fit, top_y))
+    top_right = float(np.polyval(right_fit, top_y))
+    bottom_left = float(np.polyval(left_fit, bottom_y))
+    bottom_right = float(np.polyval(right_fit, bottom_y))
+    lower_xs = xs_f[ys_f >= np.percentile(ys_f, 80)]
+    bottom_left = min(bottom_left, float(np.percentile(lower_xs, 4)))
+    bottom_right = max(bottom_right, float(np.percentile(lower_xs, 96)))
+    min_top_width = max(24.0, width * 0.18)
+    top_center = (top_left + top_right) * 0.5
+    if top_right - top_left < min_top_width:
+        top_left = top_center - min_top_width * 0.5
+        top_right = top_center + min_top_width * 0.5
+    min_bottom_width = max(min_top_width * 1.25, width * 0.45)
+    bottom_center = (bottom_left + bottom_right) * 0.5
+    if bottom_right - bottom_left < min_bottom_width:
+        bottom_left = bottom_center - min_bottom_width * 0.5
+        bottom_right = bottom_center + min_bottom_width * 0.5
+    h, w = mask.shape[:2]
+    src = np.float32([
+        [np.clip(bottom_left, 0, w - 1), np.clip(bottom_y, 0, h - 1)],
+        [np.clip(bottom_right, 0, w - 1), np.clip(bottom_y, 0, h - 1)],
+        [np.clip(top_right, 0, w - 1), np.clip(top_y, 0, h - 1)],
+        [np.clip(top_left, 0, w - 1), np.clip(top_y, 0, h - 1)],
+    ])
+    # B6 — use dual VP; primary VP guides top-edge convergence
+    vanishing_point, vanishing_point2 = detect_dual_vanishing_points(img_np, mask)
+    if vanishing_point is not None and vanishing_point["y"] < bottom_y:
+        vp_x = float(np.clip(vanishing_point["x"], -w * 0.25, w * 1.25))
+        top_width = max(src[2][0] - src[3][0], width * 0.16)
+        horizon_gap = max(bottom_y - top_y, 1.0)
+        convergence = np.clip((top_y - vanishing_point["y"]) / horizon_gap, 0.12, 0.75)
+        top_center = top_center * (1 - convergence * 0.35) + vp_x * (convergence * 0.35)
+        src[3][0] = np.clip(top_center - top_width * 0.5, 0, w - 1)
+        src[2][0] = np.clip(top_center + top_width * 0.5, 0, w - 1)
+    # B8 — expand src quad to cover convex hull corners not reached by linear fits
+    hull_box = convex_hull_quad(mask)
+    hull_quad_list = hull_box.flatten().tolist() if hull_box is not None else None
+    if hull_box is not None:
+        hull_bottom_y = float(np.max(hull_box[:, 1]))
+        hull_top_y = float(np.min(hull_box[:, 1]))
+        hull_left_x = float(np.min(hull_box[:, 0]))
+        hull_right_x = float(np.max(hull_box[:, 0]))
+        src[0][0] = min(src[0][0], hull_left_x)
+        src[1][0] = max(src[1][0], hull_right_x)
+        src[0][1] = src[1][1] = max(src[0][1], hull_bottom_y)
+        src[2][1] = src[3][1] = min(src[2][1], hull_top_y)
+        src = np.clip(src, [0, 0], [w - 1, h - 1])
+    if cv2.contourArea(src) < 100:
+        return None, None
+    dst = np.float32([[x1, y2], [x2, y2], [x2, y1], [x1, y1]])
+    homography = cv2.getPerspectiveTransform(src, dst).flatten().tolist()
+    return homography, {
+        "x": x1,
+        "y": y1,
+        "width": width,
+        "height": height,
+        "quad": src.flatten().tolist(),
+        "hullQuad": hull_quad_list,           # B8
+        "vanishingPoint": vanishing_point,    # B6 primary
+        "vanishingPoint2": vanishing_point2,  # B6 secondary
+    }
+# ---------------------------------------------------------------------------
+# B5 — Complement-Stamp Furniture
+# Use a single dilation pass (down from two) and restore the narrow contact
+# zone directly below each occluder so chair legs, table bases, and plant pots
+# sit flush against the tile surface without a visible gap or halo.
+# ---------------------------------------------------------------------------
+def build_floor_surface_mask(
+    floor_mask: np.ndarray,
+    seg_map: np.ndarray,
+    quad: np.ndarray | None,
+    depth: np.ndarray | None,
+):
+    h, w = floor_mask.shape[:2]
+    kern_size = max(5, min(h, w) // 160) | 1
+    kern = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kern_size, kern_size))
+    occluder_mask = np.isin(seg_map, class_ids(OCCLUDER_CLASSES)).astype(np.uint8)
+    # One dilation pass instead of two — keeps the occluder boundary tight so
+    # furniture feet don't leave a visible halo on the replaced tile surface.
+    occ_dilated = cv2.dilate(occluder_mask, kern, iterations=1)
+    reject_mask = np.isin(seg_map, class_ids(REJECT_SURFACE_CLASSES)).astype(np.uint8)
+    reject_dilated = cv2.dilate(reject_mask, kern, iterations=2)
+    surface = floor_mask.copy()
+    surface[reject_dilated > 0] = 0
+    if not surface.any():
+        surface = floor_mask.copy()
+    contours, _ = cv2.findContours(surface, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if contours:
+        filled = np.zeros((h, w), dtype=np.uint8)
+        cv2.drawContours(filled, contours, -1, 1, cv2.FILLED)
+        filled[reject_dilated > 0] = 0
+        surface = filled
+    if quad is not None and surface.any():
+        plane_mask = np.zeros((h, w), dtype=np.uint8)
+        cv2.fillConvexPoly(plane_mask, np.round(quad).astype(np.int32), 1)
+        plane_mask[reject_dilated > 0] = 0
+        near_floor = cv2.dilate(surface, kern, iterations=6)
+        surface = cv2.bitwise_or(surface, cv2.bitwise_and(plane_mask, near_floor))
+    surface[occ_dilated > 0] = 0
+    if depth is not None and floor_mask.any():
+        floor_depth = depth[floor_mask > 0]
+        lo, hi = float(np.percentile(floor_depth, 2)), float(np.percentile(floor_depth, 98))
+        margin = max(0.08, (hi - lo) * 0.35)
+        depth_keep = (depth >= lo - margin) & (depth <= hi + margin)
+        surface = (surface & depth_keep.astype(np.uint8)).astype(np.uint8)
+        surface[floor_mask > 0] = np.maximum(surface[floor_mask > 0], 1)
+        surface[occ_dilated > 0] = 0
+        surface[reject_dilated > 0] = 0
+    surface = clean_floor_mask(surface)
+    surface[occ_dilated > 0] = 0
+    surface[reject_dilated > 0] = 0
+    boundary_kern = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+    surface = cv2.dilate(surface, boundary_kern, iterations=1)
+    surface[occ_dilated > 0] = 0
+    surface[reject_dilated > 0] = 0
+    # Restore the narrow contact zone at the bottom edge of each occluder so
+    # furniture touches the tile surface naturally (B5).
+    contact_kern_v = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 3))
+    occ_eroded = cv2.erode(occluder_mask, contact_kern_v, iterations=1)
+    occ_bottom_edge = cv2.subtract(occluder_mask, occ_eroded)
+    contact_tiny = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+    contact_zone = cv2.dilate(occ_bottom_edge, contact_tiny, iterations=1)
+    restore = cv2.bitwise_and(contact_zone, floor_mask)
+    surface = cv2.bitwise_or(surface, restore)
+    surface[reject_dilated > 0] = 0
+    return surface
+# ---------------------------------------------------------------------------
+# B10 — Confidence-Aware Boundaries
+# Distance-transform the surface mask so pixels near its edge get a low
+# confidence score.  The frontend uses this to feather tile blending at
+# boundary transitions instead of a hard cut.
+# ---------------------------------------------------------------------------
+def build_confidence_map(surface_mask: np.ndarray) -> np.ndarray | None:
+    if not surface_mask.any():
+        return None
+    dist = cv2.distanceTransform(surface_mask.astype(np.uint8), cv2.DIST_L2, 5)
+    feather = max(10.0, min(surface_mask.shape[:2]) / 50.0)
+    confidence = np.clip(dist / feather, 0.0, 1.0)
+    return (confidence * 255).astype(np.uint8)
+# ---------------------------------------------------------------------------
+# B7 — Multi-Room Grid Alignment
+# Find all connected floor regions large enough to tile.  All regions share
+# the primary region's homography so the tile grid continues seamlessly across
+# doorways without restarting.
+# ---------------------------------------------------------------------------
+def find_floor_regions(surface_mask: np.ndarray, min_area: int) -> list[np.ndarray]:
+    count, labels, stats, _ = cv2.connectedComponentsWithStats(
+        surface_mask.astype(np.uint8), connectivity=8
+    )
+    regions = []
+    for comp_id in range(1, count):
+        if int(stats[comp_id, cv2.CC_STAT_AREA]) >= min_area:
+            regions.append((labels == comp_id).astype(np.uint8))
+    regions.sort(key=lambda m: int(m.sum()), reverse=True)
+    return regions
+def run_segmentation(img: Image.Image, img_np: np.ndarray):
+    global seg_processor, seg_model
+    if seg_model is None:
+        _load_segmentation_model()
+    h, w = img_np.shape[:2]
+    if segmentation_backend == "oneformer":
+        inputs = seg_processor(
+            images=img,
+            task_inputs=["semantic"],
+            return_tensors="pt",
+        ).to(device)
+        with torch.no_grad():
+            outputs = seg_model(**inputs)
+        result = seg_processor.post_process_semantic_segmentation(
+            outputs,
+            target_sizes=[(h, w)],
+        )[0]
+        return result.cpu().numpy().astype(np.uint8)
+    if segmentation_backend == "mask2former":
+        inputs = seg_processor(images=img, return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = seg_model(**inputs)
+        is_panoptic = "panoptic" in MASK2FORMER_MODEL_NAME
+        if is_panoptic:
+            pan_result = seg_processor.post_process_panoptic_segmentation(
+                outputs,
+                target_sizes=[(h, w)],
+            )[0]
+            seg_map = np.zeros((h, w), dtype=np.uint8)
+            pan_map = pan_result["segmentation"].cpu().numpy()
+            for seg_info in pan_result["segments_info"]:
+                seg_map[pan_map == seg_info["id"]] = min(seg_info["label_id"], 255)
+            return seg_map
+        result = seg_processor.post_process_semantic_segmentation(
+            outputs,
+            target_sizes=[(h, w)],
+        )[0]
+        return result.cpu().numpy().astype(np.uint8)
+    inputs = seg_processor(images=img, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = seg_model(**inputs)
+    seg = outputs.logits.argmax(dim=1).squeeze().cpu().numpy()
+    return cv2.resize(seg.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST)
+def segmenter_metadata_name() -> str:
+    if segmentation_backend == "oneformer":
+        return "oneformer-ade20k-swin-large"
+    return segmentation_backend
+def build_segmentation_bundle(contents: bytes):
+    t_start = time.perf_counter()
+    t0 = time.perf_counter()
+    img = Image.open(io.BytesIO(contents)).convert("RGB")
+    img_np = np.array(img)
+    h, w = img_np.shape[:2]
+    min_floor_area = max(1200, int(w * h * 0.015))
+    print(f"[TIMING] Image loading/parsing took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    seg_map = run_segmentation(img, img_np)
+    print(f"[TIMING] Floor segmentation took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    rgba = np.dstack([img_np, np.full((h, w), 255, dtype=np.uint8)])
+    pixels_b64 = base64.b64encode(rgba.tobytes()).decode()
+    print(f"[TIMING] Image RGBA encoding took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    primary_floor_ids = class_ids(PRIMARY_FLOOR_CLASSES)
+    floor_class_ids = class_ids(FLOOR_SURFACE_CLASSES)
+    floor_mask = np.isin(seg_map, primary_floor_ids).astype(np.uint8)
+    floor_mask = wall_subtract(floor_mask, seg_map, dilation=1)
+    floor_mask = clean_floor_mask(floor_mask)
+    if int(floor_mask.sum()) < min_floor_area:
+        floor_mask = np.isin(seg_map, floor_class_ids).astype(np.uint8)
+        floor_mask = wall_subtract(floor_mask, seg_map, dilation=1)
+        floor_mask = clean_floor_mask(floor_mask)
+    print(f"[TIMING] Floor masking/cleanup took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    depth = estimate_depth(img, w, h)
+    print(f"[TIMING] Depth estimation took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    homography, plane = estimate_floor_plane(floor_mask, img_np)
+    print(f"[TIMING] Plane fitting / homography calculation took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    quad = np.asarray(plane["quad"], dtype=np.float32).reshape(4, 2) if plane and plane.get("quad") else None
+    surface_mask = build_floor_surface_mask(floor_mask, seg_map, quad, depth)
+    print(f"[TIMING] Surface masking took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    shade_map, shade_range = None, (0.55, 1.35)
+    if ENABLE_INTRINSIC_SHADING:
+        if intrinsic_models is None:
+            _load_intrinsic_model()
+        if intrinsic_models is not None:
+            shade_map, shade_range = build_intrinsic_shade_map(img_np, surface_mask)
+    if shade_map is None:
+        shade_map, shade_range = build_shade_map(img_np, surface_mask)
+    print(f"[TIMING] Shade map construction took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    color_temperature = estimate_color_temperature(img_np, surface_mask)   # B2
+    light_vector = estimate_light_vector(shade_map, surface_mask)           # B3
+    confidence_map = build_confidence_map(surface_mask)                     # B10
+    print(f"[TIMING] Lighting analysis took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    # B7 — split the surface mask into connected regions; all share the same
+    # homography so the tile grid is continuous across doorways.
+    t0 = time.perf_counter()
+    floor_regions = find_floor_regions(surface_mask, min_floor_area)
+    multi_room = len(floor_regions) > 1
+    print(f"[TIMING] Floor region detection took {time.perf_counter() - t0:.3f} seconds", flush=True)
+    t0 = time.perf_counter()
+    segments = []
+    if floor_regions:
+        for region_idx, region_mask in enumerate(floor_regions):
+            region_indices = np.flatnonzero(region_mask.ravel()).astype(np.uint32)
+            if len(region_indices) < min_floor_area:
+                continue
+            # Per-region confidence sub-map
+            region_conf = build_confidence_map(region_mask)
+            segments.append({
+                "id": region_idx,
+                "className": "floor",
+                "mask": base64.b64encode(region_indices.tobytes()).decode(),
+                "homography": homography,          # shared across all regions (B7)
+                "plane": plane,
+                "shadeMap": base64.b64encode(shade_map.tobytes()).decode() if shade_map is not None else None,
+                "shadeRange": list(shade_range),   # B4 — frontend decodes with this
+                "colorTemperature": color_temperature,  # B2
+                "lightVector": light_vector,            # B3
+                "confidenceMap": base64.b64encode(region_conf.tobytes()).decode() if region_conf is not None else None,  # B10
+                "multiRoom": multi_room,                # B7
+                "gridGroup": "primary" if region_idx == 0 else f"room_{region_idx}",  # B7
+                "metadata": {
+                    "segmenter": segmenter_metadata_name(),
+                    "floorPixels": int(floor_mask.sum()),
+                    "surfacePixels": int(region_mask.sum()),
+                    "depthEnabled": depth is not None,
+                    "shadingEnabled": shade_map is not None,
+                },
+            })
+    if not segments:
+        flat_seg = seg_map.ravel()
+        for seg_id, class_id in enumerate(np.unique(flat_seg)):
+            indices = np.where(flat_seg == class_id)[0].astype(np.uint32)
+            if len(indices) < 1000:
+                continue
+            segments.append({
+                "id": int(seg_id),
+                "className": class_name_for_id(int(class_id)),
+                "mask": base64.b64encode(indices.tobytes()).decode(),
+                "homography": None,
+                "plane": None,
+                "shadeMap": None,
+                "shadeRange": None,
+                "colorTemperature": None,
+                "lightVector": None,
+                "confidenceMap": None,
+                "multiRoom": False,
+                "gridGroup": None,
+                "metadata": {
+                    "segmenter": segmenter_metadata_name(),
+                    "depthEnabled": depth is not None,
+                    "shadingEnabled": False,
+                },
+            })
+    print(f"[TIMING] Total bundle processing completed in {time.perf_counter() - t_start:.3f} seconds", flush=True)
+    return {"width": w, "height": h, "pixels": pixels_b64, "segments": segments}
+def job_path(job_id: str) -> Path:
+    return JOB_DIR / f"{job_id}.json"
+def read_job(job_id: str):
+    path = job_path(job_id)
+    if not path.exists():
+        raise HTTPException(status_code=404, detail="Job not found.")
+    return json.loads(path.read_text())
+def write_job(job: dict):
+    job_path(job["id"]).write_text(json.dumps(job))
+def run_conversion_task(job_id: str, upload_path: Path):
+    try:
+        t_start = time.perf_counter()
+        image_bytes = upload_path.read_bytes()
+        bundle = build_segmentation_bundle(image_bytes)
+        (JOB_DIR / f"{job_id}.bundle.json").write_text(json.dumps(bundle))
+        job = read_job(job_id)
+        job["status"] = "COMPLETED"
+        write_job(job)
+        print(f"[TIMING] Background conversion task for job {job_id} took {time.perf_counter() - t_start:.3f} seconds", flush=True)
+    except Exception as exc:
+        print(f"Background conversion failed: {exc}", flush=True)
+        try:
+            job = read_job(job_id)
+            job["status"] = "FAILED"
+            job["error"] = str(exc)
+            write_job(job)
+        except Exception:
+            pass
+@app.post("/viz2d/convert")
+async def convert_to_viz2d(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
+    if file.content_type and not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="Upload must be a JPG or PNG image.")
+    job_id = uuid.uuid4().hex
+    ext = Path(file.filename or "room.jpg").suffix.lower()
+    if ext not in {".jpg", ".jpeg", ".png", ".webp"}:
+        ext = ".jpg"
+    upload_path = UPLOAD_DIR / f"{job_id}{ext}"
+    with upload_path.open("wb") as out:
+        shutil.copyfileobj(file.file, out)
+    job = {
+        "id": job_id,
+        "status": "PROCESSING",
+        "inputUrl": f"/uploads/{upload_path.name}",
+        "outputUrl": f"/viz2d/jobs/{job_id}/file",
+    }
+    write_job(job)
+    background_tasks.add_task(run_conversion_task, job_id, upload_path)
+    return job
+@app.get("/viz2d/jobs/{job_id}")
+async def viz2d_job_status(job_id: str):
+    return read_job(job_id)
+@app.get("/viz2d/jobs/{job_id}/file")
+async def viz2d_job_file(job_id: str):
+    job = read_job(job_id)
+    if job.get("status") != "COMPLETED":
+        raise HTTPException(status_code=409, detail="Job is not completed yet.")
+    bundle_path = JOB_DIR / f"{job_id}.bundle.json"
+    if not bundle_path.exists():
+        raise HTTPException(status_code=404, detail="Job output not found.")
+    return Response(
+        content=bundle_path.read_bytes(),
+        media_type="application/json",
+        headers={"Content-Disposition": 'attachment; filename="visualizer.vizbundle.json"'},
+    )
+@app.post("/segment")
+async def segment(file: UploadFile = File(...)):
+    contents = await file.read()
+    return build_segmentation_bundle(contents)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8002)

requirements-base.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+aiofiles==24.1.0
+fastapi==0.115.12
+huggingface-hub==0.32.0
+numpy==2.2.6
+opencv-python-headless==4.11.0.86
+pillow==11.2.1
+python-multipart==0.0.20
+safetensors==0.5.3
+timm==1.0.15
+tokenizers==0.15.2
+transformers==4.38.2
+uvicorn==0.34.2
+scipy
+git+https://github.com/compphoto/Intrinsic.git

requirements-gpu-cu126.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+-r requirements-base.txt
+--extra-index-url https://download.pytorch.org/whl/cu126
+torch==2.7.0+cu126
+torchvision==0.22.0+cu126
+triton==3.3.0

requirements-linux-cpu.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+-r requirements-base.txt
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch==2.7.0+cpu
+torchvision==0.22.0+cpu

requirements-mac.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+-r requirements-base.txt
+torch==2.7.0
+torchvision==0.22.0

requirements.txt ADDED Viewed

	@@ -0,0 +1,89 @@

+aiofiles==24.1.0
+annotated-types==0.7.0
+anyio==4.9.0
+certifi==2025.4.26
+charset-normalizer==3.4.2
+click==8.1.8
+contourpy==1.3.2
+cycler==0.12.1
+exceptiongroup==1.3.0
+fastapi==0.115.12
+ffmpy==0.5.0
+filelock==3.18.0
+fonttools==4.58.0
+fsspec==2025.5.1
+gradio==5.31.0
+gradio_client==1.10.1
+groovy==0.1.2
+h11==0.16.0
+hf-xet==1.1.2
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.32.0
+idna==3.10
+Jinja2==3.1.6
+kiwisolver==1.4.8
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib==3.10.3
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.4.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
+opencv-python==4.11.0.86
+orjson==3.10.18
+packaging==25.0
+pandas==2.2.3
+pillow==11.2.1
+pydantic==2.11.5
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.1
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+rich==14.0.0
+ruff==0.11.11
+safehttpx==0.1.6
+safetensors==0.5.3
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+starlette==0.46.2
+sympy==1.14.0
+timm==1.0.15
+tokenizers==0.15.2
+tomlkit==0.13.2
+torch==2.7.0
+torchvision==0.22.0
+tqdm==4.67.1
+transformers==4.38.2
+triton==3.3.0
+typer==0.15.4
+typing-inspection==0.4.1
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.2
+websockets==15.0.1
+scipy
+git+https://github.com/compphoto/Intrinsic.git

start.sh ADDED Viewed

	@@ -0,0 +1,14 @@

+#!/bin/bash
+# Startup script for the floor visualizer backend.
+# Run once after cloning the repo on the server.
+set -e
+echo "==> Installing dependencies..."
+pip install --no-cache-dir -r requirements-linux-cpu.txt
+echo "==> Creating data directories..."
+mkdir -p data/uploads data/jobs
+echo "==> Starting server..."
+VISUALIZER_CONFIG=visualizer.segformer.toml uvicorn app:app --host 0.0.0.0 --port 8002 --workers 1

visualizer.gpu.toml ADDED Viewed

	@@ -0,0 +1,16 @@

+# GPU quality preset for the floor visualizer backend.
+# Run with:
+#   VISUALIZER_CONFIG=visualizer.gpu.toml uvicorn app:app --host 0.0.0.0 --port 8002
+[models]
+segmentation_model = "oneformer"
+oneformer_model_name = "shi-labs/oneformer_ade20k_swin_large"
+mask2former_model_name = "facebook/mask2former-swin-small-ade-semantic"
+segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
+depth_model_name = "Intel/dpt-large"
+intrinsic_model_version = "v2"
+[runtime]
+enable_depth_estimation = true
+enable_intrinsic_shading = true
+data_dir = "data"

visualizer.hf.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+# Preset for Hugging Face Spaces (CPU-friendly with depth & shading enabled)
+# Run with:
+#   VISUALIZER_CONFIG=visualizer.hf.toml uvicorn app:app --host 0.0.0.0 --port 7860
+[models]
+segmentation_model = "segformer"
+segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
+depth_model_name = "Intel/dpt-large"
+intrinsic_model_version = "v2"
+[runtime]
+enable_depth_estimation = true
+enable_intrinsic_shading = true
+data_dir = "data"

visualizer.local.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+# Lightweight local preset for CPU-only development on macOS.
+# Run with:
+#   VISUALIZER_CONFIG=visualizer.local.toml uvicorn app:app --host 0.0.0.0 --port 8002
+[models]
+segmentation_model = "segformer"
+segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
+depth_model_name = "Intel/dpt-large"
+[runtime]
+enable_depth_estimation = false
+enable_intrinsic_shading = false
+data_dir = "data"

visualizer.segformer.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+# CPU / low-VRAM preset using SegFormer as the primary segmentation model.
+# Use this on CPU-only servers or while waiting for GPU quota approval.
+# Run with:
+#   VISUALIZER_CONFIG=visualizer.segformer.toml uvicorn app:app --host 0.0.0.0 --port 8002
+[models]
+segmentation_model = "segformer"
+segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
+depth_model_name = "Intel/dpt-large"
+[runtime]
+enable_depth_estimation = false
+enable_intrinsic_shading = false
+data_dir = "data"