Muhammad Usman Nazir commited on
Commit
b1d1ff4
·
1 Parent(s): d331b50

deploy floor visualizer backend

Browse files
.devcontainer/Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ RUN apt-get update && \
4
+ apt-get install -y --no-install-recommends ffmpeg libglib2.0-0 && \
5
+ rm -rf /var/lib/apt/lists/*
6
+
7
+ COPY requirements-mac.txt .
8
+ RUN pip install --no-cache-dir --upgrade pip && \
9
+ pip install --no-cache-dir -r requirements-mac.txt
10
+
11
+ WORKDIR /workspace
12
+ CMD ["python", "app.py"]
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Room-Tiler-Dev",
3
+ "build": {
4
+ "dockerfile": "Dockerfile",
5
+ "context": ".."
6
+ },
7
+
8
+ // Forward the Gradio port
9
+ "forwardPorts": [7860],
10
+
11
+ // Automatically start the app when the container boots
12
+ "postCreateCommand": "python /workspace/app.py --share --server-name 0.0.0.0",
13
+
14
+ // VS Code features: Python extension, auto-formatting, etc.
15
+ "features": {
16
+ "ghcr.io/devcontainers/features/python:1": { "version": "3.10" }
17
+ },
18
+
19
+ // Sets the default shell
20
+ "remoteUser": "root"
21
+ }
22
+
.dockerignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ .devcontainer
3
+ .cache
4
+ __pycache__
5
+ *.py[cod]
6
+ *.egg-info
7
+ venv/
8
+ .venv/
9
+ env/
10
+ data/uploads/
11
+ data/jobs/
12
+ .env
13
+ *.log
.gitignore ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Virtual environment
2
+ venv/
3
+ .venv/
4
+ env/
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.py[cod]
9
+ *.pyo
10
+ *.pyd
11
+ .Python
12
+ *.egg-info/
13
+ dist/
14
+ build/
15
+
16
+ # Model cache
17
+ .cache/
18
+ ~/.cache/huggingface/
19
+
20
+ # Environment variables
21
+ .env
22
+ .env.local
23
+
24
+ # OS
25
+ .DS_Store
26
+ Thumbs.db
27
+
28
+ # IDE
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+
34
+ # Runtime data (uploads and processed job files)
35
+ data/uploads/
36
+ data/jobs/
37
+
38
+ # Logs
39
+ *.log
40
+ uvicorn.log
41
+
42
+ data/
Dockerfile ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set environment variables
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ HF_HOME=/home/user/.cache/huggingface \
7
+ VISUALIZER_CONFIG=visualizer.hf.toml \
8
+ HOME=/home/user
9
+
10
+ # Install system dependencies (git for compphoto/Intrinsic installation, ffmpeg, glib for OpenCV)
11
+ RUN apt-get update && \
12
+ apt-get install -y --no-install-recommends \
13
+ git \
14
+ ffmpeg \
15
+ libglib2.0-0 \
16
+ libgomp1 \
17
+ build-essential && \
18
+ rm -rf /var/lib/apt/lists/*
19
+
20
+ # Set up a new user named "user" with UID 1000 for Hugging Face permissions
21
+ RUN useradd -m -u 1000 user
22
+
23
+ WORKDIR /app
24
+
25
+ # Copy requirements files first
26
+ COPY requirements-base.txt ./
27
+
28
+ # Install CPU PyTorch/Torchvision first, then other base requirements
29
+ RUN pip install --no-cache-dir --upgrade pip && \
30
+ pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu && \
31
+ pip install --no-cache-dir -r requirements-base.txt
32
+
33
+ # Copy the rest of the application files
34
+ COPY --chown=user:1000 . .
35
+
36
+ # Create writable data directories and change ownership
37
+ RUN mkdir -p data/uploads data/jobs && \
38
+ chown -R user:1000 /app
39
+
40
+ # Switch to the non-root user
41
+ USER user
42
+
43
+ # Hugging Face Spaces expects the application on port 7860
44
+ EXPOSE 7860
45
+
46
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
Dockerfile.hf ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set environment variables
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ HF_HOME=/home/user/.cache/huggingface \
7
+ VISUALIZER_CONFIG=visualizer.hf.toml \
8
+ HOME=/home/user
9
+
10
+ # Install system dependencies (git for compphoto/Intrinsic installation, ffmpeg, glib for OpenCV)
11
+ RUN apt-get update && \
12
+ apt-get install -y --no-install-recommends \
13
+ git \
14
+ ffmpeg \
15
+ libglib2.0-0 \
16
+ libgomp1 \
17
+ build-essential && \
18
+ rm -rf /var/lib/apt/lists/*
19
+
20
+ # Set up a new user named "user" with UID 1000 for Hugging Face permissions
21
+ RUN useradd -m -u 1000 user
22
+
23
+ WORKDIR /app
24
+
25
+ # Copy requirements files first
26
+ COPY requirements-base.txt ./
27
+
28
+ # Install CPU PyTorch/Torchvision first, then other base requirements
29
+ RUN pip install --no-cache-dir --upgrade pip && \
30
+ pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu && \
31
+ pip install --no-cache-dir -r requirements-base.txt
32
+
33
+ # Copy the rest of the application files
34
+ COPY --chown=user:1000 . .
35
+
36
+ # Create writable data directories and change ownership
37
+ RUN mkdir -p data/uploads data/jobs && \
38
+ chown -R user:1000 /app
39
+
40
+ # Switch to the non-root user
41
+ USER user
42
+
43
+ # Hugging Face Spaces expects the application on port 7860
44
+ EXPOSE 7860
45
+
46
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
README.md CHANGED
@@ -1,10 +1,40 @@
1
  ---
2
- title: Room Visualizer
3
- emoji: 📚
4
- colorFrom: yellow
5
  colorTo: purple
6
- sdk: docker
 
 
7
  pinned: false
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Floor Visualizer
3
+ emoji: 🏆
4
+ colorFrom: indigo
5
  colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.31.0
8
+ app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Visualize custom texture or tiles on your floor
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+ ## Local setup
17
+
18
+ The Python virtual environment is disposable. To recreate it after deleting `.venv`,
19
+ use the platform-specific commands in [SETUP.md](SETUP.md).
20
+
21
+ Quick macOS CPU run:
22
+
23
+ ```bash
24
+ python3.12 -m venv .venv
25
+ source .venv/bin/activate
26
+ python -m pip install --upgrade pip
27
+ python -m pip install -r requirements-mac.txt
28
+ VISUALIZER_CONFIG=visualizer.local.toml uvicorn app:app --host 0.0.0.0 --port 8002
29
+ ```
30
+
31
+ GPU run:
32
+
33
+ ```bash
34
+ python3.12 -m venv .venv
35
+ source .venv/bin/activate
36
+ python -m pip install --upgrade pip
37
+ python -m pip install --index-url https://download.pytorch.org/whl/cu126 torch==2.7.0 torchvision==0.22.0
38
+ python -m pip install -r requirements-base.txt
39
+ VISUALIZER_CONFIG=visualizer.gpu.toml uvicorn app:app --host 0.0.0.0 --port 8002
40
+ ```
SETUP.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend Environment Setup
2
+
3
+ Use Python 3.12. The `.venv/` directory is disposable and ignored by git.
4
+
5
+ ## macOS CPU setup
6
+
7
+ ```bash
8
+ cd backend/floor-visualizer
9
+ python3.12 -m venv .venv
10
+ source .venv/bin/activate
11
+ python -m pip install --upgrade pip
12
+ python -m pip install -r requirements-mac.txt
13
+ VISUALIZER_CONFIG=visualizer.local.toml uvicorn app:app --host 0.0.0.0 --port 8002
14
+ ```
15
+
16
+ ## NVIDIA GPU setup
17
+
18
+ Use this on the GPU machine. This installs the CUDA 12.6 PyTorch wheels.
19
+
20
+ ```bash
21
+ cd backend/floor-visualizer
22
+ python3.12 -m venv .venv
23
+ source .venv/bin/activate
24
+ python -m pip install --upgrade pip
25
+ python -m pip install --index-url https://download.pytorch.org/whl/cu126 torch==2.7.0 torchvision==0.22.0
26
+ python -m pip install -r requirements-base.txt
27
+ VISUALIZER_CONFIG=visualizer.gpu.toml uvicorn app:app --host 0.0.0.0 --port 8002
28
+ ```
29
+
30
+ The first GPU run downloads `shi-labs/oneformer_ade20k_swin_large` and the depth model into the Hugging Face cache.
31
+
32
+ ## Notes
33
+
34
+ - Environment variables override TOML values, for example `SEGMENTATION_MODEL=segformer`.
35
+ - `requirements.txt` is a full freeze from an existing environment. Prefer the smaller platform files above when recreating `.venv`.
app.py ADDED
@@ -0,0 +1,1159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import io
4
+ import json
5
+ import os
6
+ import shutil
7
+ import time
8
+ try:
9
+ import tomllib
10
+ except ImportError:
11
+ try:
12
+ import tomli as tomllib
13
+ except ImportError:
14
+ try:
15
+ import tomlkit as tomllib
16
+ except ImportError:
17
+ raise ImportError(
18
+ "No TOML library found. Please run on Python 3.11+, or run 'pip install tomli' to support Python 3.10."
19
+ )
20
+ import uuid
21
+ from pathlib import Path
22
+
23
+ import cv2
24
+ import numpy as np
25
+ import torch
26
+ from fastapi import FastAPI, File, HTTPException, Response, UploadFile, BackgroundTasks
27
+ from fastapi.middleware.cors import CORSMiddleware
28
+ from fastapi.staticfiles import StaticFiles
29
+ from PIL import Image
30
+ from transformers import (
31
+ AutoImageProcessor,
32
+ AutoModelForDepthEstimation,
33
+ Mask2FormerForUniversalSegmentation,
34
+ OneFormerForUniversalSegmentation,
35
+ OneFormerProcessor,
36
+ SegformerForSemanticSegmentation,
37
+ )
38
+
39
+
40
+ ADE20K_CLASSES = [
41
+ "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed",
42
+ "window", "grass", "cabinet", "sidewalk", "person", "ground", "door",
43
+ "table", "mountain", "plant", "curtain", "chair", "car", "water",
44
+ "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field",
45
+ "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp",
46
+ "bathtub", "railing", "cushion", "base", "box", "column", "signboard",
47
+ "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace",
48
+ "refrigerator", "stairs", "runway", "bookcase", "blind", "coffee table",
49
+ "toilet", "flower", "book", "hill", "bench", "countertop", "stove",
50
+ "palm", "kitchen island", "computer", "swivel chair", "boat", "bar",
51
+ "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower",
52
+ "chandelier", "awning", "streetlight", "booth", "television", "airplane",
53
+ "dirt track", "apparel", "pole", "land", "bannister", "escalator",
54
+ "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship",
55
+ "fountain", "conveyer belt", "canopy", "washer", "plaything",
56
+ "swimming pool", "stool", "barrel", "basket", "waterfall", "tent",
57
+ "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank",
58
+ "trade name", "microwave", "pot", "animal", "bicycle", "lake",
59
+ "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce",
60
+ "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen",
61
+ "plate", "monitor", "bulletin board", "shower", "radiator", "glass",
62
+ "clock", "flag",
63
+ ]
64
+
65
+ def load_config() -> dict:
66
+ config_path = os.getenv("VISUALIZER_CONFIG")
67
+ if not config_path:
68
+ return {}
69
+
70
+ path = Path(config_path).expanduser()
71
+ if not path.is_absolute():
72
+ path = Path(__file__).resolve().parent / path
73
+ if not path.exists():
74
+ raise RuntimeError(f"VISUALIZER_CONFIG does not exist: {path}")
75
+ with path.open("rb") as config_file:
76
+ return tomllib.load(config_file)
77
+
78
+
79
+ CONFIG = load_config()
80
+
81
+
82
+ def config_value(env_name: str, section: str, key: str, default):
83
+ if env_name in os.environ:
84
+ return os.environ[env_name]
85
+ return CONFIG.get(section, {}).get(key, default)
86
+
87
+
88
+ SEGMENTATION_MODEL = str(
89
+ config_value("SEGMENTATION_MODEL", "models", "segmentation_model", "oneformer")
90
+ ).lower()
91
+ ONEFORMER_MODEL_NAME = str(config_value(
92
+ "ONEFORMER_MODEL_NAME",
93
+ "models",
94
+ "oneformer_model_name",
95
+ "shi-labs/oneformer_ade20k_swin_large",
96
+ ))
97
+ MASK2FORMER_MODEL_NAME = str(config_value(
98
+ "MASK2FORMER_MODEL_NAME",
99
+ "models",
100
+ "mask2former_model_name",
101
+ "facebook/mask2former-swin-small-ade-semantic",
102
+ ))
103
+ SEGFORMER_MODEL_NAME = str(config_value(
104
+ "SEGFORMER_MODEL_NAME",
105
+ "models",
106
+ "segformer_model_name",
107
+ "nvidia/segformer-b2-finetuned-ade-512-512",
108
+ ))
109
+ DEPTH_MODEL_NAME = str(config_value(
110
+ "DEPTH_MODEL_NAME",
111
+ "models",
112
+ "depth_model_name",
113
+ "Intel/dpt-large",
114
+ ))
115
+ ENABLE_DEPTH_ESTIMATION = str(config_value(
116
+ "ENABLE_DEPTH_ESTIMATION",
117
+ "runtime",
118
+ "enable_depth_estimation",
119
+ "1",
120
+ )).lower() in {"1", "true", "yes", "on"}
121
+ INTRINSIC_MODEL_VERSION = str(config_value(
122
+ "INTRINSIC_MODEL_VERSION",
123
+ "models",
124
+ "intrinsic_model_version",
125
+ "v2",
126
+ ))
127
+ ENABLE_INTRINSIC_SHADING = str(config_value(
128
+ "ENABLE_INTRINSIC_SHADING",
129
+ "runtime",
130
+ "enable_intrinsic_shading",
131
+ "0",
132
+ )).lower() in {"1", "true", "yes", "on"}
133
+ VISUALIZER_DATA_DIR = str(config_value(
134
+ "VISUALIZER_DATA_DIR",
135
+ "runtime",
136
+ "data_dir",
137
+ "data",
138
+ ))
139
+
140
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
141
+ seg_processor = None
142
+ seg_model = None
143
+ segmentation_backend = "segformer"
144
+ depth_processor = None
145
+ depth_model = None
146
+ intrinsic_models = None
147
+
148
+
149
+ def hf_offline() -> bool:
150
+ return os.getenv("HF_HUB_OFFLINE") == "1" or os.getenv("TRANSFORMERS_OFFLINE") == "1"
151
+
152
+
153
+ def _load_segmentation_model():
154
+ global seg_processor, seg_model, segmentation_backend
155
+
156
+ if SEGMENTATION_MODEL == "oneformer":
157
+ try:
158
+ print(f"Loading OneFormer: {ONEFORMER_MODEL_NAME} ...", flush=True)
159
+ seg_processor = OneFormerProcessor.from_pretrained(
160
+ ONEFORMER_MODEL_NAME,
161
+ local_files_only=hf_offline(),
162
+ )
163
+ seg_model = OneFormerForUniversalSegmentation.from_pretrained(
164
+ ONEFORMER_MODEL_NAME,
165
+ local_files_only=hf_offline(),
166
+ ).to(device)
167
+ seg_model.eval()
168
+ segmentation_backend = "oneformer"
169
+ print("OneFormer loaded.", flush=True)
170
+ return
171
+ except Exception as exc:
172
+ print(f"OneFormer failed ({exc}), falling back to Mask2Former.", flush=True)
173
+
174
+ if SEGMENTATION_MODEL in {"oneformer", "mask2former"}:
175
+ try:
176
+ print(f"Loading Mask2Former: {MASK2FORMER_MODEL_NAME} ...", flush=True)
177
+ seg_processor = AutoImageProcessor.from_pretrained(
178
+ MASK2FORMER_MODEL_NAME,
179
+ local_files_only=hf_offline(),
180
+ )
181
+ seg_model = Mask2FormerForUniversalSegmentation.from_pretrained(
182
+ MASK2FORMER_MODEL_NAME,
183
+ local_files_only=hf_offline(),
184
+ ).to(device)
185
+ seg_model.eval()
186
+ segmentation_backend = "mask2former"
187
+ print("Mask2Former loaded.", flush=True)
188
+ return
189
+ except Exception as exc:
190
+ print(f"Mask2Former failed ({exc}), falling back to SegFormer.", flush=True)
191
+
192
+ print(f"Loading SegFormer: {SEGFORMER_MODEL_NAME} ...", flush=True)
193
+ seg_processor = AutoImageProcessor.from_pretrained(
194
+ SEGFORMER_MODEL_NAME,
195
+ local_files_only=hf_offline(),
196
+ )
197
+ seg_model = SegformerForSemanticSegmentation.from_pretrained(
198
+ SEGFORMER_MODEL_NAME,
199
+ local_files_only=hf_offline(),
200
+ ).to(device)
201
+ seg_model.eval()
202
+ segmentation_backend = "segformer"
203
+ print("SegFormer loaded.", flush=True)
204
+
205
+
206
+ def _load_intrinsic_model():
207
+ global intrinsic_models
208
+ if ENABLE_INTRINSIC_SHADING and intrinsic_models is None:
209
+ try:
210
+ print(f"Loading Intrinsic Image Decomposition model: {INTRINSIC_MODEL_VERSION} ...", flush=True)
211
+ from intrinsic.pipeline import load_models
212
+ intrinsic_models = load_models(INTRINSIC_MODEL_VERSION, device=str(device))
213
+ print("Intrinsic model loaded.", flush=True)
214
+ except Exception as exc:
215
+ print(f"Intrinsic model failed to load ({exc}). Falling back to luminance shading.", flush=True)
216
+
217
+
218
+ app = FastAPI()
219
+ app.add_middleware(
220
+ CORSMiddleware,
221
+ allow_origins=["*"],
222
+ allow_methods=["*"],
223
+ allow_headers=["*"],
224
+ )
225
+
226
+ DATA_DIR = Path(VISUALIZER_DATA_DIR).resolve()
227
+ UPLOAD_DIR = DATA_DIR / "uploads"
228
+ JOB_DIR = DATA_DIR / "jobs"
229
+ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
230
+ JOB_DIR.mkdir(parents=True, exist_ok=True)
231
+ app.mount("/uploads", StaticFiles(directory=UPLOAD_DIR), name="uploads")
232
+
233
+ PRIMARY_FLOOR_CLASSES = {"floor"}
234
+ FLOOR_SURFACE_CLASSES = {
235
+ "floor", "road", "sidewalk", "ground", "field", "grass", "sand",
236
+ "runway", "dirt track", "land", "stairs", "step",
237
+ }
238
+ REJECT_SURFACE_CLASSES = {"wall", "ceiling", "building", "sky", "window"}
239
+ OCCLUDER_CLASSES = {
240
+ "bed", "cabinet", "person", "door", "table", "plant", "curtain", "chair",
241
+ "car", "painting", "sofa", "shelf", "mirror", "rug", "armchair", "seat", "desk",
242
+ "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box",
243
+ "column", "chest of drawers", "counter", "sink", "fireplace",
244
+ "refrigerator", "bookcase", "blind", "coffee table", "toilet", "bench",
245
+ "countertop", "stove", "kitchen island", "computer", "swivel chair",
246
+ "bar", "ottoman", "bottle", "buffet", "poster", "towel", "television",
247
+ "washer", "plaything", "stool", "basket", "bag", "cradle", "oven",
248
+ "ball", "food", "microwave", "pot", "dishwasher", "blanket", "sculpture",
249
+ "vase", "tray", "fan", "plate", "monitor", "shower", "radiator", "clock",
250
+ }
251
+
252
+
253
+ def class_name_for_id(class_id: int) -> str:
254
+ return ADE20K_CLASSES[class_id] if class_id < len(ADE20K_CLASSES) else f"class_{class_id}"
255
+
256
+
257
+ def class_ids(names: set[str]) -> list[int]:
258
+ return [idx for idx, name in enumerate(ADE20K_CLASSES) if name in names]
259
+
260
+
261
+ def estimate_depth(img: Image.Image, width: int, height: int):
262
+ global depth_processor, depth_model
263
+ if not ENABLE_DEPTH_ESTIMATION:
264
+ return None
265
+
266
+ model_name = DEPTH_MODEL_NAME
267
+ try:
268
+ if depth_processor is None or depth_model is None:
269
+ print(f"Loading depth model: {model_name} ...", flush=True)
270
+ depth_processor = AutoImageProcessor.from_pretrained(
271
+ model_name,
272
+ local_files_only=hf_offline(),
273
+ )
274
+ depth_model = AutoModelForDepthEstimation.from_pretrained(
275
+ model_name,
276
+ local_files_only=hf_offline(),
277
+ ).to(device)
278
+ depth_model.eval()
279
+ print("Depth model loaded.", flush=True)
280
+
281
+ inputs = depth_processor(images=img, return_tensors="pt").to(device)
282
+ with torch.no_grad():
283
+ outputs = depth_model(**inputs)
284
+ depth = torch.nn.functional.interpolate(
285
+ outputs.predicted_depth.unsqueeze(1),
286
+ size=(height, width),
287
+ mode="bicubic",
288
+ align_corners=False,
289
+ ).squeeze().cpu().numpy()
290
+ depth = cv2.GaussianBlur(depth.astype(np.float32), (0, 0), sigmaX=3)
291
+ depth_min, depth_max = float(np.min(depth)), float(np.max(depth))
292
+ if depth_max - depth_min < 1e-6:
293
+ return None
294
+ return (depth - depth_min) / (depth_max - depth_min)
295
+ except Exception as exc:
296
+ print(f"Depth estimation skipped ({exc}).", flush=True)
297
+ return None
298
+
299
+
300
+ # ---------------------------------------------------------------------------
301
+ # B4 — Shade Range Expansion
302
+ # Encode the shade multiplier using the actual brightness spread of the floor
303
+ # rather than a hardcoded [0.55, 1.35] clip, so dark-room images preserve the
304
+ # full dynamic range of their shadow patterns.
305
+ # ---------------------------------------------------------------------------
306
+
307
+ def _adaptive_shade_range(relative: np.ndarray, floor_mask: np.ndarray) -> tuple[float, float]:
308
+ floor_vals = relative[floor_mask > 0]
309
+ if floor_vals.size == 0:
310
+ return (0.55, 1.35)
311
+ lo = max(0.25, float(np.percentile(floor_vals, 1)))
312
+ hi = min(2.5, float(np.percentile(floor_vals, 99)))
313
+ span = hi - lo
314
+ if span < 0.4:
315
+ mid = (lo + hi) / 2.0
316
+ lo, hi = mid - 0.2, mid + 0.2
317
+ return lo, hi
318
+
319
+
320
+ def _encode_shade(relative: np.ndarray, lo: float, hi: float) -> np.ndarray:
321
+ span = hi - lo
322
+ return np.round((np.clip(relative, lo, hi) - lo) * (255.0 / span)).clip(0, 255).astype(np.uint8)
323
+
324
+
325
+ # ---------------------------------------------------------------------------
326
+ # B1 — Shadow Map Extraction
327
+ # Luminance-based shade map; returns (encoded_uint8, (lo, hi)) so the frontend
328
+ # can decode with the correct range.
329
+ # ---------------------------------------------------------------------------
330
+
331
+ def build_shade_map(
332
+ img_np: np.ndarray, surface_mask: np.ndarray
333
+ ) -> tuple[np.ndarray | None, tuple[float, float]]:
334
+ default_range = (0.55, 1.35)
335
+ if not surface_mask.any():
336
+ return None, default_range
337
+
338
+ mask = surface_mask.astype(np.uint8)
339
+ luminance = (
340
+ img_np[:, :, 0].astype(np.float32) * 0.299
341
+ + img_np[:, :, 1].astype(np.float32) * 0.587
342
+ + img_np[:, :, 2].astype(np.float32) * 0.114
343
+ )
344
+ h, w = mask.shape[:2]
345
+ floor_values = luminance[mask > 0]
346
+ if floor_values.size < max(256, int(h * w * 0.002)):
347
+ return None, default_range
348
+
349
+ median_lum = float(np.median(floor_values))
350
+ if median_lum < 1e-3:
351
+ return None, default_range
352
+
353
+ filled = luminance.copy()
354
+ filled[mask == 0] = median_lum
355
+ missing = (mask == 0).astype(np.uint8) * 255
356
+ try:
357
+ filled = cv2.inpaint(
358
+ np.clip(filled, 0, 255).astype(np.uint8),
359
+ missing,
360
+ max(3, min(h, w) // 160),
361
+ cv2.INPAINT_TELEA,
362
+ ).astype(np.float32)
363
+ except cv2.error:
364
+ pass
365
+
366
+ sigma = max(8.0, min(h, w) / 28.0)
367
+ smooth = cv2.GaussianBlur(filled, (0, 0), sigmaX=sigma, sigmaY=sigma)
368
+ relative = smooth / median_lum
369
+ relative[mask == 0] = 1.0
370
+ lo, hi = _adaptive_shade_range(relative, mask)
371
+ return _encode_shade(relative, lo, hi), (lo, hi)
372
+
373
+
374
+ def build_intrinsic_shade_map(
375
+ img_np: np.ndarray, surface_mask: np.ndarray
376
+ ) -> tuple[np.ndarray | None, tuple[float, float]]:
377
+ default_range = (0.55, 1.35)
378
+ if not surface_mask.any() or intrinsic_models is None:
379
+ return None, default_range
380
+
381
+ try:
382
+ img_float = img_np.astype(np.float32) / 255.0
383
+
384
+ from intrinsic.pipeline import run_pipeline
385
+ results = run_pipeline(intrinsic_models, img_float, device=str(device))
386
+
387
+ shading = None
388
+ if "gry_shd" in results:
389
+ shading = results["gry_shd"]
390
+ elif "dif_shd" in results:
391
+ dif = results["dif_shd"]
392
+ shading = dif[:, :, 0] * 0.299 + dif[:, :, 1] * 0.587 + dif[:, :, 2] * 0.114
393
+ else:
394
+ for k in results.keys():
395
+ if "shd" in k or "shading" in k:
396
+ shading = results[k]
397
+ if len(shading.shape) == 3:
398
+ shading = shading[:, :, 0] * 0.299 + shading[:, :, 1] * 0.587 + shading[:, :, 2] * 0.114
399
+ break
400
+
401
+ if shading is None:
402
+ return None, default_range
403
+
404
+ h, w = surface_mask.shape[:2]
405
+ if shading.shape[:2] != (h, w):
406
+ shading = cv2.resize(shading, (w, h), interpolation=cv2.INTER_LINEAR)
407
+
408
+ sigma = max(3.0, min(h, w) / 80.0)
409
+ shading = cv2.GaussianBlur(shading.astype(np.float32), (0, 0), sigmaX=sigma, sigmaY=sigma)
410
+
411
+ floor_vals = shading[surface_mask > 0]
412
+ if floor_vals.size == 0:
413
+ return None, default_range
414
+
415
+ median_val = float(np.median(floor_vals))
416
+ if median_val < 1e-3:
417
+ return None, default_range
418
+
419
+ relative_shading = shading / median_val
420
+ relative_shading[surface_mask == 0] = 1.0
421
+ lo, hi = _adaptive_shade_range(relative_shading, surface_mask)
422
+ return _encode_shade(relative_shading, lo, hi), (lo, hi)
423
+ except Exception as exc:
424
+ print(f"Intrinsic shading decomposition failed: {exc}. Falling back to default luminance shading.", flush=True)
425
+ return None, default_range
426
+
427
+
428
+ # ---------------------------------------------------------------------------
429
+ # B2 — Color Temperature
430
+ # Sample the brightest floor pixels to infer the room's lighting colour cast
431
+ # and approximate Kelvin value. Returns a dict with `kelvin` and `cast`
432
+ # (normalised RGB multipliers) so the frontend can tint replacement tiles.
433
+ # ---------------------------------------------------------------------------
434
+
435
+ def estimate_color_temperature(
436
+ img_np: np.ndarray, surface_mask: np.ndarray
437
+ ) -> dict | None:
438
+ if not surface_mask.any():
439
+ return None
440
+ pixels = img_np[surface_mask > 0].astype(np.float32)
441
+ if len(pixels) < 100:
442
+ return None
443
+
444
+ lum = pixels[:, 0] * 0.299 + pixels[:, 1] * 0.587 + pixels[:, 2] * 0.114
445
+ thresh = float(np.percentile(lum, 70))
446
+ bright = pixels[lum >= thresh]
447
+ if len(bright) < 10:
448
+ bright = pixels
449
+
450
+ mr = float(np.mean(bright[:, 0]))
451
+ mg = float(np.mean(bright[:, 1]))
452
+ mb = float(np.mean(bright[:, 2]))
453
+ ref = max(mr, mg, mb, 1e-3)
454
+
455
+ rb = mr / max(mb, 1.0)
456
+ if rb > 1.6:
457
+ kelvin = 2700
458
+ elif rb > 1.3:
459
+ kelvin = 3200
460
+ elif rb > 1.1:
461
+ kelvin = 4000
462
+ elif rb > 0.9:
463
+ kelvin = 5500
464
+ elif rb > 0.7:
465
+ kelvin = 6500
466
+ else:
467
+ kelvin = 8000
468
+
469
+ return {
470
+ "kelvin": kelvin,
471
+ "cast": {"r": round(mr / ref, 4), "g": round(mg / ref, 4), "b": round(mb / ref, 4)},
472
+ }
473
+
474
+
475
+ # ---------------------------------------------------------------------------
476
+ # B3 — Light Vector
477
+ # Estimate the primary in-plane light direction from the gradient of the shade
478
+ # map. Returns a normalised {x, y} vector pointing toward the light source.
479
+ # ---------------------------------------------------------------------------
480
+
481
+ def estimate_light_vector(
482
+ shade_map: np.ndarray | None, surface_mask: np.ndarray
483
+ ) -> dict | None:
484
+ if shade_map is None or not surface_mask.any():
485
+ return None
486
+
487
+ shade_f = shade_map.astype(np.float32)
488
+ valid = surface_mask.astype(np.float32)
489
+ kern = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
490
+ valid_e = cv2.erode(valid, kern, iterations=2)
491
+
492
+ clean = shade_f * valid_e
493
+ gx = cv2.Sobel(clean, cv2.CV_32F, 1, 0, ksize=15) * valid_e
494
+ gy = cv2.Sobel(clean, cv2.CV_32F, 0, 1, ksize=15) * valid_e
495
+ mag = np.hypot(gx, gy)
496
+ total = float(mag.sum())
497
+ if total < 1e-6:
498
+ return None
499
+
500
+ lx = float((gx * mag).sum()) / total
501
+ ly = float((gy * mag).sum()) / total
502
+ norm = float(np.hypot(lx, ly))
503
+ if norm < 1e-6:
504
+ return None
505
+
506
+ return {"x": round(lx / norm, 4), "y": round(ly / norm, 4)}
507
+
508
+
509
+ def clean_floor_mask(mask: np.ndarray) -> np.ndarray:
510
+ if mask.dtype != np.uint8:
511
+ mask = mask.astype(np.uint8)
512
+
513
+ h, w = mask.shape[:2]
514
+ min_side = max(3, min(h, w))
515
+ close_size = max(5, int(round(min_side * 0.018))) | 1
516
+ open_size = max(3, int(round(min_side * 0.006))) | 1
517
+ closed = cv2.morphologyEx(
518
+ mask,
519
+ cv2.MORPH_CLOSE,
520
+ cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (close_size, close_size)),
521
+ )
522
+ cleaned = cv2.morphologyEx(
523
+ closed,
524
+ cv2.MORPH_OPEN,
525
+ cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (open_size, open_size)),
526
+ )
527
+
528
+ count, labels, stats, _ = cv2.connectedComponentsWithStats(cleaned, connectivity=8)
529
+ if count <= 1:
530
+ return cleaned
531
+
532
+ gravity_threshold = int(h * 0.60)
533
+ min_area = max(1000, int(h * w * 0.01))
534
+ result = np.zeros_like(cleaned)
535
+ for component_id in range(1, count):
536
+ area = stats[component_id, cv2.CC_STAT_AREA]
537
+ if area < min_area:
538
+ continue
539
+ comp_bottom = stats[component_id, cv2.CC_STAT_TOP] + stats[component_id, cv2.CC_STAT_HEIGHT]
540
+ if comp_bottom <= gravity_threshold:
541
+ continue
542
+ result[labels == component_id] = 1
543
+
544
+ if result.any():
545
+ return result
546
+ largest = 1 + int(np.argmax(stats[1:, cv2.CC_STAT_AREA]))
547
+ return (labels == largest).astype(np.uint8)
548
+
549
+
550
+ def wall_subtract(mask: np.ndarray, seg_map: np.ndarray, dilation: int = 1) -> np.ndarray:
551
+ reject_raw = np.isin(seg_map, class_ids(REJECT_SURFACE_CLASSES)).astype(np.uint8)
552
+ if dilation > 0:
553
+ kern = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
554
+ reject_raw = cv2.dilate(reject_raw, kern, iterations=dilation)
555
+ result = mask.copy()
556
+ result[reject_raw > 0] = 0
557
+ return result
558
+
559
+
560
+ def fit_floor_edges(mask: np.ndarray):
561
+ h, w = mask.shape[:2]
562
+ row_ys, lefts, rights = [], [], []
563
+ step = max(1, h // 260)
564
+ for y in range(0, h, step):
565
+ row_xs = np.where(mask[y] > 0)[0]
566
+ if len(row_xs) < max(8, w * 0.01):
567
+ continue
568
+ row_ys.append(float(y))
569
+ lefts.append(float(np.percentile(row_xs, 3)))
570
+ rights.append(float(np.percentile(row_xs, 97)))
571
+ if len(row_ys) < 8:
572
+ return None
573
+ row_ys_np = np.asarray(row_ys, dtype=np.float32)
574
+ return np.polyfit(row_ys_np, np.asarray(lefts, dtype=np.float32), 1), np.polyfit(
575
+ row_ys_np,
576
+ np.asarray(rights, dtype=np.float32),
577
+ 1,
578
+ )
579
+
580
+
581
+ # ---------------------------------------------------------------------------
582
+ # B8 — Convex Hull Quad Fitting
583
+ # Derive a tight bounding quadrilateral from the convex hull of the floor mask.
584
+ # Used alongside the linear edge-fit quad so that corners of L-shaped rooms
585
+ # and irregular floor boundaries are fully covered.
586
+ # ---------------------------------------------------------------------------
587
+
588
+ def convex_hull_quad(mask: np.ndarray) -> np.ndarray | None:
589
+ ys, xs = np.where(mask > 0)
590
+ if len(xs) < 50:
591
+ return None
592
+ pts = np.column_stack([xs, ys]).astype(np.float32)
593
+ hull = cv2.convexHull(pts)
594
+ if hull is None or len(hull) < 4:
595
+ return None
596
+ rect = cv2.minAreaRect(hull.squeeze())
597
+ box = cv2.boxPoints(rect) # (4, 2) — x,y columns
598
+ h, w = mask.shape[:2]
599
+ box[:, 0] = np.clip(box[:, 0], 0, w - 1)
600
+ box[:, 1] = np.clip(box[:, 1], 0, h - 1)
601
+ return box
602
+
603
+
604
+ # ---------------------------------------------------------------------------
605
+ # B6 — Dual Vanishing Point Detection
606
+ # Detect two independent VPs: one from positive-slope lines (converging right)
607
+ # and one from negative-slope lines (converging left), covering oblique shots
608
+ # and corner-camera perspectives.
609
+ # ---------------------------------------------------------------------------
610
+
611
+ def detect_dual_vanishing_points(
612
+ img_np: np.ndarray, floor_mask: np.ndarray
613
+ ) -> tuple[dict | None, dict | None]:
614
+ gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
615
+ gray = cv2.GaussianBlur(gray, (5, 5), 0)
616
+ edges = cv2.Canny(gray, 60, 160)
617
+ edges[floor_mask == 0] = 0
618
+ lines = cv2.HoughLinesP(
619
+ edges,
620
+ rho=1,
621
+ theta=np.pi / 180,
622
+ threshold=60,
623
+ minLineLength=max(40, min(img_np.shape[:2]) // 16),
624
+ maxLineGap=24,
625
+ )
626
+ if lines is None:
627
+ return None, None
628
+
629
+ h, w = img_np.shape[:2]
630
+ pos_lines, neg_lines = [], []
631
+ for line in lines[:, 0, :]:
632
+ x1, y1, x2, y2 = [float(v) for v in line]
633
+ dx, dy = x2 - x1, y2 - y1
634
+ length = float(np.hypot(dx, dy))
635
+ if length < 40 or abs(dx) < 1:
636
+ continue
637
+ slope = dy / dx
638
+ if abs(slope) < 0.18:
639
+ continue
640
+ entry = (x1, y1, x2, y2, slope, length)
641
+ if slope > 0:
642
+ pos_lines.append(entry)
643
+ else:
644
+ neg_lines.append(entry)
645
+
646
+ def _find_vp(group: list) -> dict | None:
647
+ intersections = []
648
+ for i, (x1, y1, _, _, s1, l1) in enumerate(group):
649
+ a1 = y1 - s1 * x1
650
+ for x3, y3, _, _, s2, l2 in group[i + 1:]:
651
+ if abs(s1 - s2) < 0.08:
652
+ continue
653
+ denom = s1 - s2
654
+ if abs(denom) < 1e-9:
655
+ continue
656
+ x = (a2 := y3 - s2 * x3, (a2 - a1) / denom)[1]
657
+ y = s1 * x + a1
658
+ if -w * 0.6 <= x <= w * 1.6 and -h * 1.2 <= y <= h * 1.0:
659
+ intersections.append((x, y, min(l1, l2)))
660
+ if len(intersections) < 3:
661
+ return None
662
+ pts = np.array([[p[0], p[1]] for p in intersections], np.float32)
663
+ weights = np.array([p[2] for p in intersections], np.float32)
664
+ center = np.average(pts, axis=0, weights=weights)
665
+ dist = np.linalg.norm(pts - center, axis=1)
666
+ keep = dist <= np.percentile(dist, 70)
667
+ if keep.sum() >= 3:
668
+ center = np.average(pts[keep], axis=0, weights=weights[keep])
669
+ return {"x": float(center[0]), "y": float(center[1])}
670
+
671
+ vp_right = _find_vp(pos_lines) # positive-slope lines converge to the right
672
+ vp_left = _find_vp(neg_lines) # negative-slope lines converge to the left
673
+
674
+ # Primary VP = the one whose y is lower in the image (closer to the horizon)
675
+ candidates = [(vp, abs(vp["y"])) for vp in [vp_right, vp_left] if vp is not None]
676
+ if not candidates:
677
+ return None, None
678
+ candidates.sort(key=lambda t: t[1])
679
+ primary = candidates[0][0]
680
+ secondary = candidates[1][0] if len(candidates) > 1 else None
681
+ return primary, secondary
682
+
683
+
684
+ def estimate_floor_plane(mask: np.ndarray, img_np: np.ndarray):
685
+ ys, xs = np.where(mask > 0)
686
+ if len(xs) < 1000:
687
+ return None, None
688
+
689
+ xs_f, ys_f = xs.astype(np.float32), ys.astype(np.float32)
690
+ x1, x2 = float(np.percentile(xs_f, 1)), float(np.percentile(xs_f, 99))
691
+ y1, y2 = float(np.percentile(ys_f, 1)), float(np.percentile(ys_f, 99))
692
+ width, height = x2 - x1, y2 - y1
693
+ if width < 20 or height < 20:
694
+ return None, None
695
+
696
+ top_y = float(np.percentile(ys_f, 8))
697
+ bottom_y = float(np.percentile(ys_f, 97))
698
+ edge_fits = fit_floor_edges(mask)
699
+ if edge_fits is None:
700
+ return None, None
701
+ left_fit, right_fit = edge_fits
702
+
703
+ top_left = float(np.polyval(left_fit, top_y))
704
+ top_right = float(np.polyval(right_fit, top_y))
705
+ bottom_left = float(np.polyval(left_fit, bottom_y))
706
+ bottom_right = float(np.polyval(right_fit, bottom_y))
707
+ lower_xs = xs_f[ys_f >= np.percentile(ys_f, 80)]
708
+ bottom_left = min(bottom_left, float(np.percentile(lower_xs, 4)))
709
+ bottom_right = max(bottom_right, float(np.percentile(lower_xs, 96)))
710
+
711
+ min_top_width = max(24.0, width * 0.18)
712
+ top_center = (top_left + top_right) * 0.5
713
+ if top_right - top_left < min_top_width:
714
+ top_left = top_center - min_top_width * 0.5
715
+ top_right = top_center + min_top_width * 0.5
716
+
717
+ min_bottom_width = max(min_top_width * 1.25, width * 0.45)
718
+ bottom_center = (bottom_left + bottom_right) * 0.5
719
+ if bottom_right - bottom_left < min_bottom_width:
720
+ bottom_left = bottom_center - min_bottom_width * 0.5
721
+ bottom_right = bottom_center + min_bottom_width * 0.5
722
+
723
+ h, w = mask.shape[:2]
724
+ src = np.float32([
725
+ [np.clip(bottom_left, 0, w - 1), np.clip(bottom_y, 0, h - 1)],
726
+ [np.clip(bottom_right, 0, w - 1), np.clip(bottom_y, 0, h - 1)],
727
+ [np.clip(top_right, 0, w - 1), np.clip(top_y, 0, h - 1)],
728
+ [np.clip(top_left, 0, w - 1), np.clip(top_y, 0, h - 1)],
729
+ ])
730
+
731
+ # B6 — use dual VP; primary VP guides top-edge convergence
732
+ vanishing_point, vanishing_point2 = detect_dual_vanishing_points(img_np, mask)
733
+ if vanishing_point is not None and vanishing_point["y"] < bottom_y:
734
+ vp_x = float(np.clip(vanishing_point["x"], -w * 0.25, w * 1.25))
735
+ top_width = max(src[2][0] - src[3][0], width * 0.16)
736
+ horizon_gap = max(bottom_y - top_y, 1.0)
737
+ convergence = np.clip((top_y - vanishing_point["y"]) / horizon_gap, 0.12, 0.75)
738
+ top_center = top_center * (1 - convergence * 0.35) + vp_x * (convergence * 0.35)
739
+ src[3][0] = np.clip(top_center - top_width * 0.5, 0, w - 1)
740
+ src[2][0] = np.clip(top_center + top_width * 0.5, 0, w - 1)
741
+
742
+ # B8 — expand src quad to cover convex hull corners not reached by linear fits
743
+ hull_box = convex_hull_quad(mask)
744
+ hull_quad_list = hull_box.flatten().tolist() if hull_box is not None else None
745
+ if hull_box is not None:
746
+ hull_bottom_y = float(np.max(hull_box[:, 1]))
747
+ hull_top_y = float(np.min(hull_box[:, 1]))
748
+ hull_left_x = float(np.min(hull_box[:, 0]))
749
+ hull_right_x = float(np.max(hull_box[:, 0]))
750
+ src[0][0] = min(src[0][0], hull_left_x)
751
+ src[1][0] = max(src[1][0], hull_right_x)
752
+ src[0][1] = src[1][1] = max(src[0][1], hull_bottom_y)
753
+ src[2][1] = src[3][1] = min(src[2][1], hull_top_y)
754
+ src = np.clip(src, [0, 0], [w - 1, h - 1])
755
+
756
+ if cv2.contourArea(src) < 100:
757
+ return None, None
758
+ dst = np.float32([[x1, y2], [x2, y2], [x2, y1], [x1, y1]])
759
+ homography = cv2.getPerspectiveTransform(src, dst).flatten().tolist()
760
+ return homography, {
761
+ "x": x1,
762
+ "y": y1,
763
+ "width": width,
764
+ "height": height,
765
+ "quad": src.flatten().tolist(),
766
+ "hullQuad": hull_quad_list, # B8
767
+ "vanishingPoint": vanishing_point, # B6 primary
768
+ "vanishingPoint2": vanishing_point2, # B6 secondary
769
+ }
770
+
771
+
772
+ # ---------------------------------------------------------------------------
773
+ # B5 — Complement-Stamp Furniture
774
+ # Use a single dilation pass (down from two) and restore the narrow contact
775
+ # zone directly below each occluder so chair legs, table bases, and plant pots
776
+ # sit flush against the tile surface without a visible gap or halo.
777
+ # ---------------------------------------------------------------------------
778
+
779
+ def build_floor_surface_mask(
780
+ floor_mask: np.ndarray,
781
+ seg_map: np.ndarray,
782
+ quad: np.ndarray | None,
783
+ depth: np.ndarray | None,
784
+ ):
785
+ h, w = floor_mask.shape[:2]
786
+ kern_size = max(5, min(h, w) // 160) | 1
787
+ kern = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kern_size, kern_size))
788
+ occluder_mask = np.isin(seg_map, class_ids(OCCLUDER_CLASSES)).astype(np.uint8)
789
+
790
+ # One dilation pass instead of two — keeps the occluder boundary tight so
791
+ # furniture feet don't leave a visible halo on the replaced tile surface.
792
+ occ_dilated = cv2.dilate(occluder_mask, kern, iterations=1)
793
+
794
+ reject_mask = np.isin(seg_map, class_ids(REJECT_SURFACE_CLASSES)).astype(np.uint8)
795
+ reject_dilated = cv2.dilate(reject_mask, kern, iterations=2)
796
+
797
+ surface = floor_mask.copy()
798
+ surface[reject_dilated > 0] = 0
799
+ if not surface.any():
800
+ surface = floor_mask.copy()
801
+
802
+ contours, _ = cv2.findContours(surface, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
803
+ if contours:
804
+ filled = np.zeros((h, w), dtype=np.uint8)
805
+ cv2.drawContours(filled, contours, -1, 1, cv2.FILLED)
806
+ filled[reject_dilated > 0] = 0
807
+ surface = filled
808
+
809
+ if quad is not None and surface.any():
810
+ plane_mask = np.zeros((h, w), dtype=np.uint8)
811
+ cv2.fillConvexPoly(plane_mask, np.round(quad).astype(np.int32), 1)
812
+ plane_mask[reject_dilated > 0] = 0
813
+ near_floor = cv2.dilate(surface, kern, iterations=6)
814
+ surface = cv2.bitwise_or(surface, cv2.bitwise_and(plane_mask, near_floor))
815
+
816
+ surface[occ_dilated > 0] = 0
817
+ if depth is not None and floor_mask.any():
818
+ floor_depth = depth[floor_mask > 0]
819
+ lo, hi = float(np.percentile(floor_depth, 2)), float(np.percentile(floor_depth, 98))
820
+ margin = max(0.08, (hi - lo) * 0.35)
821
+ depth_keep = (depth >= lo - margin) & (depth <= hi + margin)
822
+ surface = (surface & depth_keep.astype(np.uint8)).astype(np.uint8)
823
+ surface[floor_mask > 0] = np.maximum(surface[floor_mask > 0], 1)
824
+ surface[occ_dilated > 0] = 0
825
+ surface[reject_dilated > 0] = 0
826
+
827
+ surface = clean_floor_mask(surface)
828
+ surface[occ_dilated > 0] = 0
829
+ surface[reject_dilated > 0] = 0
830
+
831
+ boundary_kern = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
832
+ surface = cv2.dilate(surface, boundary_kern, iterations=1)
833
+ surface[occ_dilated > 0] = 0
834
+ surface[reject_dilated > 0] = 0
835
+
836
+ # Restore the narrow contact zone at the bottom edge of each occluder so
837
+ # furniture touches the tile surface naturally (B5).
838
+ contact_kern_v = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 3))
839
+ occ_eroded = cv2.erode(occluder_mask, contact_kern_v, iterations=1)
840
+ occ_bottom_edge = cv2.subtract(occluder_mask, occ_eroded)
841
+ contact_tiny = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
842
+ contact_zone = cv2.dilate(occ_bottom_edge, contact_tiny, iterations=1)
843
+ restore = cv2.bitwise_and(contact_zone, floor_mask)
844
+ surface = cv2.bitwise_or(surface, restore)
845
+ surface[reject_dilated > 0] = 0
846
+
847
+ return surface
848
+
849
+
850
+ # ---------------------------------------------------------------------------
851
+ # B10 — Confidence-Aware Boundaries
852
+ # Distance-transform the surface mask so pixels near its edge get a low
853
+ # confidence score. The frontend uses this to feather tile blending at
854
+ # boundary transitions instead of a hard cut.
855
+ # ---------------------------------------------------------------------------
856
+
857
+ def build_confidence_map(surface_mask: np.ndarray) -> np.ndarray | None:
858
+ if not surface_mask.any():
859
+ return None
860
+ dist = cv2.distanceTransform(surface_mask.astype(np.uint8), cv2.DIST_L2, 5)
861
+ feather = max(10.0, min(surface_mask.shape[:2]) / 50.0)
862
+ confidence = np.clip(dist / feather, 0.0, 1.0)
863
+ return (confidence * 255).astype(np.uint8)
864
+
865
+
866
+ # ---------------------------------------------------------------------------
867
+ # B7 — Multi-Room Grid Alignment
868
+ # Find all connected floor regions large enough to tile. All regions share
869
+ # the primary region's homography so the tile grid continues seamlessly across
870
+ # doorways without restarting.
871
+ # ---------------------------------------------------------------------------
872
+
873
+ def find_floor_regions(surface_mask: np.ndarray, min_area: int) -> list[np.ndarray]:
874
+ count, labels, stats, _ = cv2.connectedComponentsWithStats(
875
+ surface_mask.astype(np.uint8), connectivity=8
876
+ )
877
+ regions = []
878
+ for comp_id in range(1, count):
879
+ if int(stats[comp_id, cv2.CC_STAT_AREA]) >= min_area:
880
+ regions.append((labels == comp_id).astype(np.uint8))
881
+ regions.sort(key=lambda m: int(m.sum()), reverse=True)
882
+ return regions
883
+
884
+
885
+ def run_segmentation(img: Image.Image, img_np: np.ndarray):
886
+ global seg_processor, seg_model
887
+ if seg_model is None:
888
+ _load_segmentation_model()
889
+ h, w = img_np.shape[:2]
890
+ if segmentation_backend == "oneformer":
891
+ inputs = seg_processor(
892
+ images=img,
893
+ task_inputs=["semantic"],
894
+ return_tensors="pt",
895
+ ).to(device)
896
+ with torch.no_grad():
897
+ outputs = seg_model(**inputs)
898
+ result = seg_processor.post_process_semantic_segmentation(
899
+ outputs,
900
+ target_sizes=[(h, w)],
901
+ )[0]
902
+ return result.cpu().numpy().astype(np.uint8)
903
+
904
+ if segmentation_backend == "mask2former":
905
+ inputs = seg_processor(images=img, return_tensors="pt").to(device)
906
+ with torch.no_grad():
907
+ outputs = seg_model(**inputs)
908
+ is_panoptic = "panoptic" in MASK2FORMER_MODEL_NAME
909
+ if is_panoptic:
910
+ pan_result = seg_processor.post_process_panoptic_segmentation(
911
+ outputs,
912
+ target_sizes=[(h, w)],
913
+ )[0]
914
+ seg_map = np.zeros((h, w), dtype=np.uint8)
915
+ pan_map = pan_result["segmentation"].cpu().numpy()
916
+ for seg_info in pan_result["segments_info"]:
917
+ seg_map[pan_map == seg_info["id"]] = min(seg_info["label_id"], 255)
918
+ return seg_map
919
+ result = seg_processor.post_process_semantic_segmentation(
920
+ outputs,
921
+ target_sizes=[(h, w)],
922
+ )[0]
923
+ return result.cpu().numpy().astype(np.uint8)
924
+
925
+ inputs = seg_processor(images=img, return_tensors="pt").to(device)
926
+ with torch.no_grad():
927
+ outputs = seg_model(**inputs)
928
+ seg = outputs.logits.argmax(dim=1).squeeze().cpu().numpy()
929
+ return cv2.resize(seg.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST)
930
+
931
+
932
+ def segmenter_metadata_name() -> str:
933
+ if segmentation_backend == "oneformer":
934
+ return "oneformer-ade20k-swin-large"
935
+ return segmentation_backend
936
+
937
+
938
+ def build_segmentation_bundle(contents: bytes):
939
+ t_start = time.perf_counter()
940
+
941
+ t0 = time.perf_counter()
942
+ img = Image.open(io.BytesIO(contents)).convert("RGB")
943
+ img_np = np.array(img)
944
+ h, w = img_np.shape[:2]
945
+ min_floor_area = max(1200, int(w * h * 0.015))
946
+ print(f"[TIMING] Image loading/parsing took {time.perf_counter() - t0:.3f} seconds", flush=True)
947
+
948
+ t0 = time.perf_counter()
949
+ seg_map = run_segmentation(img, img_np)
950
+ print(f"[TIMING] Floor segmentation took {time.perf_counter() - t0:.3f} seconds", flush=True)
951
+
952
+ t0 = time.perf_counter()
953
+ rgba = np.dstack([img_np, np.full((h, w), 255, dtype=np.uint8)])
954
+ pixels_b64 = base64.b64encode(rgba.tobytes()).decode()
955
+ print(f"[TIMING] Image RGBA encoding took {time.perf_counter() - t0:.3f} seconds", flush=True)
956
+
957
+ t0 = time.perf_counter()
958
+ primary_floor_ids = class_ids(PRIMARY_FLOOR_CLASSES)
959
+ floor_class_ids = class_ids(FLOOR_SURFACE_CLASSES)
960
+ floor_mask = np.isin(seg_map, primary_floor_ids).astype(np.uint8)
961
+ floor_mask = wall_subtract(floor_mask, seg_map, dilation=1)
962
+ floor_mask = clean_floor_mask(floor_mask)
963
+ if int(floor_mask.sum()) < min_floor_area:
964
+ floor_mask = np.isin(seg_map, floor_class_ids).astype(np.uint8)
965
+ floor_mask = wall_subtract(floor_mask, seg_map, dilation=1)
966
+ floor_mask = clean_floor_mask(floor_mask)
967
+ print(f"[TIMING] Floor masking/cleanup took {time.perf_counter() - t0:.3f} seconds", flush=True)
968
+
969
+ t0 = time.perf_counter()
970
+ depth = estimate_depth(img, w, h)
971
+ print(f"[TIMING] Depth estimation took {time.perf_counter() - t0:.3f} seconds", flush=True)
972
+
973
+ t0 = time.perf_counter()
974
+ homography, plane = estimate_floor_plane(floor_mask, img_np)
975
+ print(f"[TIMING] Plane fitting / homography calculation took {time.perf_counter() - t0:.3f} seconds", flush=True)
976
+
977
+ t0 = time.perf_counter()
978
+ quad = np.asarray(plane["quad"], dtype=np.float32).reshape(4, 2) if plane and plane.get("quad") else None
979
+ surface_mask = build_floor_surface_mask(floor_mask, seg_map, quad, depth)
980
+ print(f"[TIMING] Surface masking took {time.perf_counter() - t0:.3f} seconds", flush=True)
981
+
982
+ t0 = time.perf_counter()
983
+ shade_map, shade_range = None, (0.55, 1.35)
984
+ if ENABLE_INTRINSIC_SHADING:
985
+ if intrinsic_models is None:
986
+ _load_intrinsic_model()
987
+ if intrinsic_models is not None:
988
+ shade_map, shade_range = build_intrinsic_shade_map(img_np, surface_mask)
989
+ if shade_map is None:
990
+ shade_map, shade_range = build_shade_map(img_np, surface_mask)
991
+ print(f"[TIMING] Shade map construction took {time.perf_counter() - t0:.3f} seconds", flush=True)
992
+
993
+ t0 = time.perf_counter()
994
+ color_temperature = estimate_color_temperature(img_np, surface_mask) # B2
995
+ light_vector = estimate_light_vector(shade_map, surface_mask) # B3
996
+ confidence_map = build_confidence_map(surface_mask) # B10
997
+ print(f"[TIMING] Lighting analysis took {time.perf_counter() - t0:.3f} seconds", flush=True)
998
+
999
+ # B7 — split the surface mask into connected regions; all share the same
1000
+ # homography so the tile grid is continuous across doorways.
1001
+ t0 = time.perf_counter()
1002
+ floor_regions = find_floor_regions(surface_mask, min_floor_area)
1003
+ multi_room = len(floor_regions) > 1
1004
+ print(f"[TIMING] Floor region detection took {time.perf_counter() - t0:.3f} seconds", flush=True)
1005
+
1006
+ t0 = time.perf_counter()
1007
+ segments = []
1008
+
1009
+ if floor_regions:
1010
+ for region_idx, region_mask in enumerate(floor_regions):
1011
+ region_indices = np.flatnonzero(region_mask.ravel()).astype(np.uint32)
1012
+ if len(region_indices) < min_floor_area:
1013
+ continue
1014
+
1015
+ # Per-region confidence sub-map
1016
+ region_conf = build_confidence_map(region_mask)
1017
+
1018
+ segments.append({
1019
+ "id": region_idx,
1020
+ "className": "floor",
1021
+ "mask": base64.b64encode(region_indices.tobytes()).decode(),
1022
+ "homography": homography, # shared across all regions (B7)
1023
+ "plane": plane,
1024
+ "shadeMap": base64.b64encode(shade_map.tobytes()).decode() if shade_map is not None else None,
1025
+ "shadeRange": list(shade_range), # B4 — frontend decodes with this
1026
+ "colorTemperature": color_temperature, # B2
1027
+ "lightVector": light_vector, # B3
1028
+ "confidenceMap": base64.b64encode(region_conf.tobytes()).decode() if region_conf is not None else None, # B10
1029
+ "multiRoom": multi_room, # B7
1030
+ "gridGroup": "primary" if region_idx == 0 else f"room_{region_idx}", # B7
1031
+ "metadata": {
1032
+ "segmenter": segmenter_metadata_name(),
1033
+ "floorPixels": int(floor_mask.sum()),
1034
+ "surfacePixels": int(region_mask.sum()),
1035
+ "depthEnabled": depth is not None,
1036
+ "shadingEnabled": shade_map is not None,
1037
+ },
1038
+ })
1039
+
1040
+ if not segments:
1041
+ flat_seg = seg_map.ravel()
1042
+ for seg_id, class_id in enumerate(np.unique(flat_seg)):
1043
+ indices = np.where(flat_seg == class_id)[0].astype(np.uint32)
1044
+ if len(indices) < 1000:
1045
+ continue
1046
+ segments.append({
1047
+ "id": int(seg_id),
1048
+ "className": class_name_for_id(int(class_id)),
1049
+ "mask": base64.b64encode(indices.tobytes()).decode(),
1050
+ "homography": None,
1051
+ "plane": None,
1052
+ "shadeMap": None,
1053
+ "shadeRange": None,
1054
+ "colorTemperature": None,
1055
+ "lightVector": None,
1056
+ "confidenceMap": None,
1057
+ "multiRoom": False,
1058
+ "gridGroup": None,
1059
+ "metadata": {
1060
+ "segmenter": segmenter_metadata_name(),
1061
+ "depthEnabled": depth is not None,
1062
+ "shadingEnabled": False,
1063
+ },
1064
+ })
1065
+
1066
+ print(f"[TIMING] Total bundle processing completed in {time.perf_counter() - t_start:.3f} seconds", flush=True)
1067
+ return {"width": w, "height": h, "pixels": pixels_b64, "segments": segments}
1068
+
1069
+
1070
+ def job_path(job_id: str) -> Path:
1071
+ return JOB_DIR / f"{job_id}.json"
1072
+
1073
+
1074
+ def read_job(job_id: str):
1075
+ path = job_path(job_id)
1076
+ if not path.exists():
1077
+ raise HTTPException(status_code=404, detail="Job not found.")
1078
+ return json.loads(path.read_text())
1079
+
1080
+
1081
+ def write_job(job: dict):
1082
+ job_path(job["id"]).write_text(json.dumps(job))
1083
+
1084
+
1085
+ def run_conversion_task(job_id: str, upload_path: Path):
1086
+ try:
1087
+ t_start = time.perf_counter()
1088
+ image_bytes = upload_path.read_bytes()
1089
+ bundle = build_segmentation_bundle(image_bytes)
1090
+ (JOB_DIR / f"{job_id}.bundle.json").write_text(json.dumps(bundle))
1091
+ job = read_job(job_id)
1092
+ job["status"] = "COMPLETED"
1093
+ write_job(job)
1094
+ print(f"[TIMING] Background conversion task for job {job_id} took {time.perf_counter() - t_start:.3f} seconds", flush=True)
1095
+ except Exception as exc:
1096
+ print(f"Background conversion failed: {exc}", flush=True)
1097
+ try:
1098
+ job = read_job(job_id)
1099
+ job["status"] = "FAILED"
1100
+ job["error"] = str(exc)
1101
+ write_job(job)
1102
+ except Exception:
1103
+ pass
1104
+
1105
+
1106
+ @app.post("/viz2d/convert")
1107
+ async def convert_to_viz2d(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
1108
+ if file.content_type and not file.content_type.startswith("image/"):
1109
+ raise HTTPException(status_code=400, detail="Upload must be a JPG or PNG image.")
1110
+
1111
+ job_id = uuid.uuid4().hex
1112
+ ext = Path(file.filename or "room.jpg").suffix.lower()
1113
+ if ext not in {".jpg", ".jpeg", ".png", ".webp"}:
1114
+ ext = ".jpg"
1115
+ upload_path = UPLOAD_DIR / f"{job_id}{ext}"
1116
+ with upload_path.open("wb") as out:
1117
+ shutil.copyfileobj(file.file, out)
1118
+
1119
+ job = {
1120
+ "id": job_id,
1121
+ "status": "PROCESSING",
1122
+ "inputUrl": f"/uploads/{upload_path.name}",
1123
+ "outputUrl": f"/viz2d/jobs/{job_id}/file",
1124
+ }
1125
+ write_job(job)
1126
+ background_tasks.add_task(run_conversion_task, job_id, upload_path)
1127
+ return job
1128
+
1129
+
1130
+ @app.get("/viz2d/jobs/{job_id}")
1131
+ async def viz2d_job_status(job_id: str):
1132
+ return read_job(job_id)
1133
+
1134
+
1135
+ @app.get("/viz2d/jobs/{job_id}/file")
1136
+ async def viz2d_job_file(job_id: str):
1137
+ job = read_job(job_id)
1138
+ if job.get("status") != "COMPLETED":
1139
+ raise HTTPException(status_code=409, detail="Job is not completed yet.")
1140
+ bundle_path = JOB_DIR / f"{job_id}.bundle.json"
1141
+ if not bundle_path.exists():
1142
+ raise HTTPException(status_code=404, detail="Job output not found.")
1143
+ return Response(
1144
+ content=bundle_path.read_bytes(),
1145
+ media_type="application/json",
1146
+ headers={"Content-Disposition": 'attachment; filename="visualizer.vizbundle.json"'},
1147
+ )
1148
+
1149
+
1150
+ @app.post("/segment")
1151
+ async def segment(file: UploadFile = File(...)):
1152
+ contents = await file.read()
1153
+ return build_segmentation_bundle(contents)
1154
+
1155
+
1156
+ if __name__ == "__main__":
1157
+ import uvicorn
1158
+
1159
+ uvicorn.run(app, host="0.0.0.0", port=8002)
requirements-base.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ fastapi==0.115.12
3
+ huggingface-hub==0.32.0
4
+ numpy==2.2.6
5
+ opencv-python-headless==4.11.0.86
6
+ pillow==11.2.1
7
+ python-multipart==0.0.20
8
+ safetensors==0.5.3
9
+ timm==1.0.15
10
+ tokenizers==0.15.2
11
+ transformers==4.38.2
12
+ uvicorn==0.34.2
13
+ scipy
14
+ git+https://github.com/compphoto/Intrinsic.git
requirements-gpu-cu126.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ -r requirements-base.txt
2
+ --extra-index-url https://download.pytorch.org/whl/cu126
3
+ torch==2.7.0+cu126
4
+ torchvision==0.22.0+cu126
5
+ triton==3.3.0
requirements-linux-cpu.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ -r requirements-base.txt
2
+ --extra-index-url https://download.pytorch.org/whl/cpu
3
+ torch==2.7.0+cpu
4
+ torchvision==0.22.0+cpu
requirements-mac.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ -r requirements-base.txt
2
+ torch==2.7.0
3
+ torchvision==0.22.0
requirements.txt ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ annotated-types==0.7.0
3
+ anyio==4.9.0
4
+ certifi==2025.4.26
5
+ charset-normalizer==3.4.2
6
+ click==8.1.8
7
+ contourpy==1.3.2
8
+ cycler==0.12.1
9
+ exceptiongroup==1.3.0
10
+ fastapi==0.115.12
11
+ ffmpy==0.5.0
12
+ filelock==3.18.0
13
+ fonttools==4.58.0
14
+ fsspec==2025.5.1
15
+ gradio==5.31.0
16
+ gradio_client==1.10.1
17
+ groovy==0.1.2
18
+ h11==0.16.0
19
+ hf-xet==1.1.2
20
+ httpcore==1.0.9
21
+ httpx==0.28.1
22
+ huggingface-hub==0.32.0
23
+ idna==3.10
24
+ Jinja2==3.1.6
25
+ kiwisolver==1.4.8
26
+ markdown-it-py==3.0.0
27
+ MarkupSafe==3.0.2
28
+ matplotlib==3.10.3
29
+ mdurl==0.1.2
30
+ mpmath==1.3.0
31
+ networkx==3.4.2
32
+ numpy==2.2.6
33
+ nvidia-cublas-cu12==12.6.4.1
34
+ nvidia-cuda-cupti-cu12==12.6.80
35
+ nvidia-cuda-nvrtc-cu12==12.6.77
36
+ nvidia-cuda-runtime-cu12==12.6.77
37
+ nvidia-cudnn-cu12==9.5.1.17
38
+ nvidia-cufft-cu12==11.3.0.4
39
+ nvidia-cufile-cu12==1.11.1.6
40
+ nvidia-curand-cu12==10.3.7.77
41
+ nvidia-cusolver-cu12==11.7.1.2
42
+ nvidia-cusparse-cu12==12.5.4.2
43
+ nvidia-cusparselt-cu12==0.6.3
44
+ nvidia-nccl-cu12==2.26.2
45
+ nvidia-nvjitlink-cu12==12.6.85
46
+ nvidia-nvtx-cu12==12.6.77
47
+ opencv-python==4.11.0.86
48
+ orjson==3.10.18
49
+ packaging==25.0
50
+ pandas==2.2.3
51
+ pillow==11.2.1
52
+ pydantic==2.11.5
53
+ pydantic_core==2.33.2
54
+ pydub==0.25.1
55
+ Pygments==2.19.1
56
+ pyparsing==3.2.3
57
+ python-dateutil==2.9.0.post0
58
+ python-multipart==0.0.20
59
+ pytz==2025.2
60
+ PyYAML==6.0.2
61
+ regex==2024.11.6
62
+ requests==2.32.3
63
+ rich==14.0.0
64
+ ruff==0.11.11
65
+ safehttpx==0.1.6
66
+ safetensors==0.5.3
67
+ semantic-version==2.10.0
68
+ shellingham==1.5.4
69
+ six==1.17.0
70
+ sniffio==1.3.1
71
+ starlette==0.46.2
72
+ sympy==1.14.0
73
+ timm==1.0.15
74
+ tokenizers==0.15.2
75
+ tomlkit==0.13.2
76
+ torch==2.7.0
77
+ torchvision==0.22.0
78
+ tqdm==4.67.1
79
+ transformers==4.38.2
80
+ triton==3.3.0
81
+ typer==0.15.4
82
+ typing-inspection==0.4.1
83
+ typing_extensions==4.13.2
84
+ tzdata==2025.2
85
+ urllib3==2.4.0
86
+ uvicorn==0.34.2
87
+ websockets==15.0.1
88
+ scipy
89
+ git+https://github.com/compphoto/Intrinsic.git
start.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Startup script for the floor visualizer backend.
3
+ # Run once after cloning the repo on the server.
4
+
5
+ set -e
6
+
7
+ echo "==> Installing dependencies..."
8
+ pip install --no-cache-dir -r requirements-linux-cpu.txt
9
+
10
+ echo "==> Creating data directories..."
11
+ mkdir -p data/uploads data/jobs
12
+
13
+ echo "==> Starting server..."
14
+ VISUALIZER_CONFIG=visualizer.segformer.toml uvicorn app:app --host 0.0.0.0 --port 8002 --workers 1
visualizer.gpu.toml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GPU quality preset for the floor visualizer backend.
2
+ # Run with:
3
+ # VISUALIZER_CONFIG=visualizer.gpu.toml uvicorn app:app --host 0.0.0.0 --port 8002
4
+
5
+ [models]
6
+ segmentation_model = "oneformer"
7
+ oneformer_model_name = "shi-labs/oneformer_ade20k_swin_large"
8
+ mask2former_model_name = "facebook/mask2former-swin-small-ade-semantic"
9
+ segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
10
+ depth_model_name = "Intel/dpt-large"
11
+ intrinsic_model_version = "v2"
12
+
13
+ [runtime]
14
+ enable_depth_estimation = true
15
+ enable_intrinsic_shading = true
16
+ data_dir = "data"
visualizer.hf.toml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Preset for Hugging Face Spaces (CPU-friendly with depth & shading enabled)
2
+ # Run with:
3
+ # VISUALIZER_CONFIG=visualizer.hf.toml uvicorn app:app --host 0.0.0.0 --port 7860
4
+
5
+ [models]
6
+ segmentation_model = "segformer"
7
+ segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
8
+ depth_model_name = "Intel/dpt-large"
9
+ intrinsic_model_version = "v2"
10
+
11
+ [runtime]
12
+ enable_depth_estimation = true
13
+ enable_intrinsic_shading = true
14
+ data_dir = "data"
visualizer.local.toml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lightweight local preset for CPU-only development on macOS.
2
+ # Run with:
3
+ # VISUALIZER_CONFIG=visualizer.local.toml uvicorn app:app --host 0.0.0.0 --port 8002
4
+
5
+ [models]
6
+ segmentation_model = "segformer"
7
+ segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
8
+ depth_model_name = "Intel/dpt-large"
9
+
10
+ [runtime]
11
+ enable_depth_estimation = false
12
+ enable_intrinsic_shading = false
13
+ data_dir = "data"
visualizer.segformer.toml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CPU / low-VRAM preset using SegFormer as the primary segmentation model.
2
+ # Use this on CPU-only servers or while waiting for GPU quota approval.
3
+ # Run with:
4
+ # VISUALIZER_CONFIG=visualizer.segformer.toml uvicorn app:app --host 0.0.0.0 --port 8002
5
+
6
+ [models]
7
+ segmentation_model = "segformer"
8
+ segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
9
+ depth_model_name = "Intel/dpt-large"
10
+
11
+ [runtime]
12
+ enable_depth_estimation = false
13
+ enable_intrinsic_shading = false
14
+ data_dir = "data"