Spaces:

LuLing
/

IScene-demo

Running on Zero

App Files Files Community

IScene-demo / interactive_demo.py

LuLing

update

7f44940 verified about 1 month ago

raw

history blame contribute delete

20.9 kB

	"""Interactive I-Scene demo.

	Run from the repository root:

	python interactive_demo.py
	"""

	from __future__ import annotations

	import argparse
	import os
	import uuid
	from dataclasses import dataclass
	from datetime import datetime
	from pathlib import Path
	from typing import Any

	os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")

	import gradio as gr
	import numpy as np
	import torch
	from gradio_image_prompter import ImagePrompter
	from gradio_litmodel3d import LitModel3D
	from PIL import Image
	from transformers import AutoModelForMaskGeneration, AutoProcessor

	from iscene.inference.inferencer import ISceneInferencer


	REPO_ROOT = Path(__file__).resolve().parent
	DEFAULT_MODEL = "LuLing/IScene"
	MODEL_ID = DEFAULT_MODEL
	BASE_MODEL_ID: str \| None = None
	DEFAULT_SEED = 43
	DEFAULT_SIMPLIFY = 0.95
	DEFAULT_OUTPUT_ROOT = REPO_ROOT / "outputs" / "demo"
	UPLOAD_ROOT = DEFAULT_OUTPUT_ROOT / "_uploads"
	TARGET_SIZE = (512, 512)
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32

	SAM_MODELS = {
	"sam-vit-huge (best quality, 636M)": "facebook/sam-vit-huge",
	"sam-vit-large (balanced, 308M)": "facebook/sam-vit-large",
	"sam-vit-base (fastest, 91M)": "facebook/sam-vit-base",
	}

	MARKDOWN = """
	# I-Scene Interactive Demo

	Generate a 3D scene from one image.

	We notice some instability problems caused by huggingface space. We suggest serious users run this demo locally.

	Workflow:
	1. Pick an example, or upload an image and draw boxes around objects.
	2. Use the example mask, or click Run SAM Segmentation to create a mask.
	3. Click Generate Gaussian Splatting Preview to create and preview `scene_pred.ply`.
	4. Click Generate GLB only when you need mesh assets.
	5. To save each instance in the scene, run the inference code with the same RGB/mask; `run_inference.py` writes per-instance assets alongside the scene output.

	Note: The first run may be slow because the model checkpoint needs to be downloaded and cached.
	"""

	EXAMPLE_ORDER = [
	"Scenethesis/SAM-3D-testing-case_rgb.png",
	"Gen3DSR/Gen3DSR_scene1_rgb.png",
	"MIDI-example/cartoon_style_07_rgb.png",
	"Scenethesis/children_playroom2_rgb.png",
	"Scenethesis/scenethesis-reading-corner-rgb.png",
	"DL3DV/DL3DV-garden-rgb.png",
	"DL3DV/DL3DV-table-chair-set-rgb.png",
	"DL3DV/DL3DV-tables-rgb.png",
	"outdoor/scene_beach2_rgb.png",
	]


	def _discover_examples() -> list[tuple[str, Path, Path]]:
	examples_root = REPO_ROOT / "examples"
	pairs: list[tuple[str, Path, Path]] = []
	for rel_name in EXAMPLE_ORDER:
	rgb_path = examples_root / rel_name
	if not rgb_path.exists():
	continue

	seg_path = None
	if "_rgb" in rgb_path.name:
	seg_path = rgb_path.with_name(rgb_path.name.replace("_rgb", "_seg"))
	elif "-rgb" in rgb_path.name:
	seg_path = rgb_path.with_name(rgb_path.name.replace("-rgb", "-seg"))
	if seg_path is None or not seg_path.exists():
	continue

	rel = rgb_path.relative_to(examples_root)
	case_name = rgb_path.stem.replace("_rgb", "").replace("-rgb", "")
	label = f"{rel.parent.as_posix()} / {case_name}"
	pairs.append((label, rgb_path, seg_path))
	return pairs


	EXAMPLES = _discover_examples()
	EXAMPLE_ROWS = [[str(rgb), str(mask)] for _, rgb, mask in EXAMPLES]


	@dataclass
	class DemoRunState:
	rgb_path: str
	mask_path: str
	output_dir: str
	seed: int
	simplify: float


	_sam_cache: dict[str, tuple[AutoProcessor, AutoModelForMaskGeneration]] = {}
	_inferencer_cache: dict[tuple[str, str], ISceneInferencer] = {}


	def _make_session_dir(request: gr.Request \| None, root: Path = UPLOAD_ROOT) -> Path:
	session_hash = getattr(request, "session_hash", None) or uuid.uuid4().hex[:10]
	path = root / session_hash
	path.mkdir(parents=True, exist_ok=True)
	return path


	def _timestamped_output_dir(request: gr.Request \| None) -> Path:
	session_hash = getattr(request, "session_hash", None) or uuid.uuid4().hex[:10]
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	return DEFAULT_OUTPUT_ROOT / f"{timestamp}_{session_hash}"


	def _get_prompt_image(image_prompts: Any) -> Image.Image \| None:
	if image_prompts is None:
	return None
	if isinstance(image_prompts, dict):
	image = image_prompts.get("image")
	else:
	image = image_prompts
	if image is None:
	return None
	if isinstance(image, Image.Image):
	return image.convert("RGB")
	return Image.open(image).convert("RGB")


	def _save_prompt_rgb(image_prompts: Any, request: gr.Request \| None) -> Path:
	image = _get_prompt_image(image_prompts)
	if image is None:
	raise gr.Error("Please upload an RGB image.")
	session_dir = _make_session_dir(request)
	path = session_dir / "input_rgb.png"
	image.save(path)
	return path


	def _resolve_mask_path(mask_path: str \| None) -> Path:
	if not mask_path:
	raise gr.Error("Please choose an example or run SAM segmentation first.")
	path = Path(mask_path)
	if not path.exists():
	raise gr.Error(f"Mask file does not exist: {path}")
	return path


	def _get_inferencer() -> ISceneInferencer:
	key = (MODEL_ID, BASE_MODEL_ID or "")
	if key not in _inferencer_cache:
	_inferencer_cache[key] = ISceneInferencer.from_pretrained(MODEL_ID, base_model_id=BASE_MODEL_ID)
	return _inferencer_cache[key]


	def _get_sam_model(model_choice: str) -> tuple[AutoProcessor, AutoModelForMaskGeneration]:
	model_id = SAM_MODELS[model_choice]
	if model_id in _sam_cache:
	return _sam_cache[model_id]
	processor = AutoProcessor.from_pretrained(model_id)
	segmentator = AutoModelForMaskGeneration.from_pretrained(model_id).to(DEVICE, DTYPE)
	segmentator.eval()
	_sam_cache[model_id] = (processor, segmentator)
	return processor, segmentator


	def _boxes_from_prompts(image_prompts: Any) -> list[list[list[int]]]:
	points = image_prompts.get("points", []) if isinstance(image_prompts, dict) else []
	if not points:
	raise gr.Error("Please draw at least one box before running SAM segmentation.")
	boxes = []
	for box in points:
	x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[3]), int(box[4])
	x_min, x_max = sorted((x1, x2))
	y_min, y_max = sorted((y1, y2))
	if x_max <= x_min or y_max <= y_min:
	continue
	boxes.append([x_min, y_min, x_max, y_max])
	if not boxes:
	raise gr.Error("No valid boxes were drawn.")
	return [boxes]


	def _mask_to_polygon(mask: np.ndarray) -> list[list[int]] \| None:
	import cv2

	contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if not contours:
	return None
	contour = max(contours, key=cv2.contourArea)
	return contour.reshape(-1, 2).tolist()


	def _polygon_to_mask(polygon: list[list[int]], image_shape: tuple[int, int]) -> np.ndarray:
	import cv2

	mask = np.zeros(image_shape, dtype=np.uint8)
	cv2.fillPoly(mask, [np.array(polygon, dtype=np.int32)], color=(1,))
	return mask


	def _refine_masks(
	masks: torch.Tensor,
	*,
	polygon_refinement: bool,
	mask_threshold: float,
	) -> list[np.ndarray]:
	masks = masks.detach().cpu().float()
	if masks.ndim == 5:
	masks = masks[:, :, 0]
	if masks.ndim == 4:
	masks = masks.mean(dim=1)
	masks = (masks > mask_threshold).numpy().astype(np.uint8)
	refined = [mask for mask in masks]
	if polygon_refinement:
	for idx, mask in enumerate(refined):
	polygon = _mask_to_polygon(mask)
	if polygon is not None:
	refined[idx] = _polygon_to_mask(polygon, mask.shape)
	return refined


	def _palette() -> list[int]:
	colors = [0, 0, 0]
	hue = 0.0
	golden_ratio = 0.618033988749895
	for _ in range(1, 256):
	hue = (hue + golden_ratio) % 1.0
	h = hue * 6.0
	c = 0.81
	x = c * (1 - abs(h % 2 - 1))
	m = 0.09
	if h < 1:
	r, g, b = c, x, 0
	elif h < 2:
	r, g, b = x, c, 0
	elif h < 3:
	r, g, b = 0, c, x
	elif h < 4:
	r, g, b = 0, x, c
	elif h < 5:
	r, g, b = x, 0, c
	else:
	r, g, b = c, 0, x
	colors.extend([int((r + m) * 255), int((g + m) * 255), int((b + m) * 255)])
	return colors


	def _label_mask_to_pil(label_map: np.ndarray) -> Image.Image:
	if label_map.max(initial=0) < 256:
	image = Image.fromarray(label_map.astype(np.uint8), mode="P")
	image.putpalette(_palette())
	return image
	encoded = np.zeros((*label_map.shape, 3), dtype=np.uint8)
	encoded[..., 0] = label_map & 255
	encoded[..., 1] = (label_map >> 8) & 255
	return Image.fromarray(encoded, mode="RGB")


	def resize_prompt_image(image_prompts: Any) -> Any:
	image = _get_prompt_image(image_prompts)
	if image is None:
	return image_prompts
	resized = image.resize(TARGET_SIZE, Image.Resampling.LANCZOS)
	UPLOAD_ROOT.mkdir(parents=True, exist_ok=True)
	path = UPLOAD_ROOT / f"prompt_{uuid.uuid4().hex[:10]}.png"
	resized.save(path)
	return {"image": str(path), "points": []}


	def reset_uploaded_image(image_prompts: Any) -> tuple[Any, None, str]:
	return resize_prompt_image(image_prompts), None, ""


	def _coerce_file_path(value: Any) -> str:
	if isinstance(value, dict):
	return str(value.get("path") or value.get("name") or value.get("image") or "")
	return str(value or "")


	def _raw_example_mask_path(mask_path: Any) -> str:
	selected_mask = Path(_coerce_file_path(mask_path)).name
	for _, _rgb_path, raw_mask_path in EXAMPLES:
	if raw_mask_path.name == selected_mask:
	return str(raw_mask_path)
	return _coerce_file_path(mask_path)


	def load_example_pair(rgb_path: Any, mask_path: Any) -> tuple[dict[str, Any], str, str]:
	rgb_value = _coerce_file_path(rgb_path)
	mask_value = _coerce_file_path(mask_path)
	return {"image": rgb_value, "points": []}, mask_value, _raw_example_mask_path(mask_path)


	@torch.no_grad()
	def run_segmentation(
	image_prompts: Any,
	model_choice: str,
	polygon_refinement: bool,
	mask_threshold: float,
	request: gr.Request,
	) -> tuple[str, str]:
	image = _get_prompt_image(image_prompts)
	if image is None:
	raise gr.Error("Please upload an RGB image before running segmentation.")
	boxes = _boxes_from_prompts(image_prompts)
	processor, segmentator = _get_sam_model(model_choice)
	inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to(segmentator.device, segmentator.dtype)
	outputs = segmentator(**inputs)
	masks = processor.post_process_masks(
	masks=outputs.pred_masks,
	original_sizes=inputs.original_sizes,
	reshaped_input_sizes=inputs.reshaped_input_sizes,
	)[0]
	masks = _refine_masks(masks, polygon_refinement=polygon_refinement, mask_threshold=mask_threshold)

	label_map = np.zeros(image.size[::-1], dtype=np.uint32)
	for idx, mask in enumerate(masks, start=1):
	label_map[mask > 0] = idx

	mask_image = _label_mask_to_pil(label_map)
	session_dir = _make_session_dir(request)
	raw_path = session_dir / "sam_mask.png"
	mask_image.save(raw_path)

	torch.cuda.empty_cache()
	return str(raw_path), str(raw_path)


	def run_gaussian_preview(
	image_prompts: Any,
	mask_path: str \| None,
	seed: int,
	simplify: float,
	output_dir_text: str,
	request: gr.Request,
	) -> tuple[str, dict[str, Any], dict[str, Any], str, DemoRunState]:
	rgb_path = _save_prompt_rgb(image_prompts, request)
	mask_path = _resolve_mask_path(mask_path)
	output_dir = Path(output_dir_text).expanduser() if output_dir_text.strip() else _timestamped_output_dir(request)
	output_dir.mkdir(parents=True, exist_ok=True)

	inferencer = _get_inferencer()
	inferencer.infer_and_save_scene(
	scene_rgb_path=rgb_path,
	instance_seg_path=mask_path,
	output_dir=output_dir,
	overwrite=True,
	save_dbg=False,
	simplify=float(simplify),
	only_3dgs=True,
	seed=int(seed),
	)

	scene_ply = output_dir / "scene_pred.ply"
	if not scene_ply.exists():
	raise gr.Error(f"Generation finished but scene_pred.ply was not found in {output_dir}")

	state = DemoRunState(
	rgb_path=str(rgb_path),
	mask_path=str(mask_path),
	output_dir=str(output_dir),
	seed=int(seed),
	simplify=float(simplify),
	)
	torch.cuda.empty_cache()
	return (
	str(scene_ply),
	gr.update(value=str(scene_ply), interactive=True),
	gr.update(value=None, interactive=False),
	"",
	state,
	)


	def _progress_bar(percent: int) -> str:
	percent = max(0, min(100, int(percent)))
	return f"""
	<div style="height: 14px; width: 100%; background: #ece7dc; border-radius: 999px; overflow: hidden; border: 1px solid #d8cbb7;">
	<div style="height: 100%; width: {percent}%; background: linear-gradient(90deg, #b77a2f, #e0b15a); transition: width 0.4s ease;"></div>
	</div>
	"""


	def run_glb_export(
	state: DemoRunState \| dict[str, Any] \| None,
	simplify: float,
	) -> Any:
	if state is None:
	raise gr.Error("Please run GS preview first so the demo knows which RGB/mask/output directory to use.")
	if isinstance(state, dict):
	state = DemoRunState(**state)

	output_dir = Path(state.output_dir)
	yield gr.update(value=None, interactive=False), _progress_bar(5), gr.update(value=None)
	inferencer = _get_inferencer()
	yield gr.update(value=None, interactive=False), _progress_bar(15), gr.update(value=None)
	inferencer.infer_and_save_scene(
	scene_rgb_path=state.rgb_path,
	instance_seg_path=state.mask_path,
	output_dir=output_dir,
	overwrite=True,
	save_dbg=False,
	simplify=float(simplify),
	only_3dgs=False,
	seed=int(state.seed),
	)

	scene_glb = output_dir / "scene_pred.glb"
	if not scene_glb.exists():
	raise gr.Error(f"GLB export finished but scene_pred.glb was not found in {output_dir}")

	torch.cuda.empty_cache()
	yield gr.update(value=str(scene_glb), interactive=True), _progress_bar(100), str(scene_glb)


	def clear_glb_outputs() -> tuple[dict[str, Any], str, None, dict[str, Any]]:
	return gr.update(value=None, interactive=False), "", None, gr.update(value=None)


	def clear_generation_outputs() -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], str, None, dict[str, Any]]:
	return (
	gr.update(value=None),
	gr.update(value=None, interactive=False),
	gr.update(value=None, interactive=False),
	"",
	None,
	gr.update(value=None),
	)


	def build_demo() -> gr.Blocks:
	with gr.Blocks(title="I-Scene Interactive Demo", delete_cache=(3600, 3600)) as demo:
	gr.Markdown(MARKDOWN)

	run_state = gr.State(None)

	with gr.Row():
	with gr.Column(scale=1):
	image_prompts = ImagePrompter(
	label="RGB image (upload, then optionally draw boxes for SAM)",
	type="pil",
	height=520,
	)

	with gr.Row():
	segment_button = gr.Button("Run SAM Segmentation", variant="secondary")

	with gr.Accordion("Segmentation settings", open=False):
	sam_model = gr.Dropdown(
	choices=list(SAM_MODELS.keys()),
	value="sam-vit-huge (best quality, 636M)",
	label="SAM model",
	)
	mask_threshold = gr.Slider(
	minimum=-1.0,
	maximum=1.0,
	value=0.0,
	step=0.05,
	label="Mask threshold",
	)
	polygon_refinement = gr.Checkbox(
	label="Polygon refinement",
	value=False,
	)

	sam_mask_preview = gr.Image(
	label="Instance mask",
	type="filepath",
	format="png",
	height=260,
	)
	mask_path_value = gr.Textbox(visible=False)

	with gr.Accordion("Generation settings", open=False):
	seed = gr.Number(label="Seed", value=DEFAULT_SEED, precision=0)
	simplify = gr.Slider(
	minimum=0.5,
	maximum=1.0,
	value=DEFAULT_SIMPLIFY,
	step=0.01,
	label="GLB mesh simplify ratio",
	)
	output_dir = gr.Textbox(
	label="Output directory (optional)",
	placeholder="Leave empty to use outputs/demo/<timestamp>_<session>",
	)

	generate_gs_button = gr.Button("Generate Gaussian Splatting Preview", variant="primary", size="lg")

	with gr.Column(scale=1):
	preview = LitModel3D(
	label="3D preview",
	exposure=10.0,
	height=520,
	)
	download_gs = gr.DownloadButton(
	label="Download Gaussian Splatting PLY",
	interactive=False,
	)

	with gr.Row():
	generate_glb_button = gr.Button("Generate GLB", variant="secondary")
	glb_progress = gr.HTML(value="")
	glb_preview = gr.Model3D(
	label="GLB mesh preview",
	clear_color=(0.98, 0.96, 0.91, 1.0),
	display_mode="solid",
	height=360,
	)
	download_glb = gr.DownloadButton(
	label="Download Mesh GLB",
	interactive=False,
	)

	image_prompts.upload(
	reset_uploaded_image,
	inputs=[image_prompts],
	outputs=[image_prompts, sam_mask_preview, mask_path_value],
	)

	segment_button.click(
	run_segmentation,
	inputs=[image_prompts, sam_model, polygon_refinement, mask_threshold],
	outputs=[sam_mask_preview, mask_path_value],
	)

	generate_gs_button.click(
	clear_generation_outputs,
	outputs=[preview, download_gs, download_glb, glb_progress, run_state, glb_preview],
	show_progress="hidden",
	).then(
	run_gaussian_preview,
	inputs=[
	image_prompts,
	mask_path_value,
	seed,
	simplify,
	output_dir,
	],
	outputs=[preview, download_gs, download_glb, glb_progress, run_state],
	show_progress="full",
	)

	generate_glb_button.click(
	run_glb_export,
	inputs=[run_state, simplify],
	outputs=[download_glb, glb_progress, glb_preview],
	show_progress="hidden",
	)

	example_rgb = gr.Image(label="RGB", type="filepath", visible=False)
	example_mask = gr.Image(label="Instance mask", type="filepath", visible=False)

	with gr.Row():
	gr.Examples(
	examples=EXAMPLE_ROWS,
	inputs=[example_rgb, example_mask],
	outputs=[image_prompts, sam_mask_preview, mask_path_value],
	fn=load_example_pair,
	cache_examples=False,
	label="Examples",
	run_on_click=True,
	)

	return demo


	def parse_args() -> argparse.Namespace:
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument("--server_name", default="0.0.0.0")
	parser.add_argument("--server_port", type=int, default=7860)
	parser.add_argument("--share", action="store_true")
	parser.add_argument("--model", default=DEFAULT_MODEL, help="I-Scene model id or local model package path.")
	parser.add_argument(
	"--base_model",
	default=None,
	help="Optional TRELLIS base model id or local mirror path. Defaults to the model package metadata.",
	)
	return parser.parse_args()


	def main() -> None:
	global MODEL_ID, BASE_MODEL_ID

	args = parse_args()
	MODEL_ID = args.model
	BASE_MODEL_ID = args.base_model
	DEFAULT_OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
	UPLOAD_ROOT.mkdir(parents=True, exist_ok=True)
	demo = build_demo()
	demo.queue()
	demo.launch(
	server_name=args.server_name,
	server_port=args.server_port,
	share=args.share,
	)


	if __name__ == "__main__":
	main()