from __future__ import annotations import os import sys from pathlib import Path try: import spaces except ImportError: class _SpacesCompat: @staticmethod def GPU(*decorator_args, **decorator_kwargs): if decorator_args and callable(decorator_args[0]) and len(decorator_args) == 1 and not decorator_kwargs: return decorator_args[0] def decorator(fn): return fn return decorator spaces = _SpacesCompat() import gradio as gr import torch CURRENT_FILE = Path(__file__).resolve() PROJECT_ROOT = CURRENT_FILE.parents[1] for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]): if (candidate / "infer").exists() and (candidate / "models").exists(): PROJECT_ROOT = candidate break if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from demo.real_world_pipeline import ( # noqa: E402 DEFAULT_BBOX_MODEL, DEFAULT_REAL_CONFIG_PATH, DEFAULT_RUN_NAME, DEFAULT_WORK_DIR, run_real_world_pipeline, ) DEFAULT_EXAMPLE_DIR = Path( os.environ.get( "SYNLAYERS_EXAMPLE_DIR", "/project/llmsvgen/share/data/kmw_layered_dataset/real_world_inference/layers_real_test_1024", ) ) def read_int_env(name: str, default: int) -> int: raw = os.environ.get(name) if raw is None: return default try: return int(raw) except ValueError: return default ZERO_GPU_SIZE = (os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large").lower() ZERO_GPU_DURATION = max(60, read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 900)) def list_example_images(limit: int = 6) -> list[list[str]]: if not DEFAULT_EXAMPLE_DIR.exists(): return [] candidates = [] for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"): candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext)) candidates = sorted(candidates)[:limit] return [[str(path)] for path in candidates] def build_gallery(result: dict) -> list[tuple[str, str]]: gallery: list[tuple[str, str]] = [] if result.get("whole_image_rgba"): gallery.append((result["whole_image_rgba"], "Whole RGBA")) if result.get("background_rgba"): gallery.append((result["background_rgba"], "Background RGBA")) for idx, path in enumerate(result.get("layer_images", [])): gallery.append((path, f"Layer {idx}")) return gallery def get_gpu_name() -> str: if not torch.cuda.is_available(): return "None" try: return torch.cuda.get_device_name(torch.cuda.current_device()) except Exception as exc: # pragma: no cover - defensive runtime reporting return f"Unavailable ({exc})" def is_zero_gpu_space() -> bool: accelerator = os.environ.get("ACCELERATOR", "").lower() return ( os.environ.get("ZEROGPU_V2", "").lower() == "true" or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1" or accelerator == "zerogpu" or accelerator.startswith("zero") ) def get_runtime_status_markdown() -> str: accelerator = os.environ.get("ACCELERATOR", "unknown") space_id = os.environ.get("SPACE_ID", "local") model_repo = os.environ.get("SYNLAYERS_MODEL_REPO", "(unset)") zero_gpu_enabled = is_zero_gpu_space() lines = ["## Runtime Status", f"- `SPACE_ID`: `{space_id}`", f"- `ACCELERATOR`: `{accelerator}`"] if zero_gpu_enabled: lines.extend( [ f"- `ZeroGPU mode`: `True`", f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`", f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds", f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`", f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`", "", "This Space is configured for Hugging Face ZeroGPU.", "A shared H200 GPU is requested on demand when you click `Run Full Pipeline`.", "Queueing and quota are managed by Hugging Face ZeroGPU, not by an in-app GPU selector.", ] ) else: cuda_available = torch.cuda.is_available() lines.extend( [ f"- `CUDA available`: `{cuda_available}`", f"- `GPU device`: `{get_gpu_name()}`", f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`", "", ] ) if accelerator == "none" or not cuda_available: lines.extend( [ "This Space is not currently running with a usable CUDA GPU.", "The GPU type must be chosen by the Space owner in Hugging Face `Settings -> Hardware`.", "Visitors cannot switch GPUs from inside the Gradio app.", ] ) else: lines.append("The CUDA runtime is available and the full SynLayers pipeline can run here.") return "\n".join(lines) @spaces.GPU(duration=ZERO_GPU_DURATION, size=ZERO_GPU_SIZE) def run_demo_inference( image_path: str, sample_name: str, max_new_tokens: int, seed_value: float, ) -> dict: seed = int(seed_value) if seed_value >= 0 else None return run_real_world_pipeline( image_path=image_path, sample_name=sample_name or None, work_dir=DEFAULT_WORK_DIR, bbox_model=DEFAULT_BBOX_MODEL, config_path=DEFAULT_REAL_CONFIG_PATH, max_new_tokens=int(max_new_tokens), seed=seed, run_name=DEFAULT_RUN_NAME, ) def run_demo( image_path: str, sample_name: str, max_new_tokens: int, seed_value: float, ): if not image_path: raise gr.Error("Please upload an input image first.") try: result = run_demo_inference( image_path=image_path, sample_name=sample_name, max_new_tokens=max_new_tokens, seed_value=seed_value, ) except Exception as exc: raise gr.Error(str(exc)) from exc return ( result["bbox_visualization"], result["merged_image"], result["bbox_record"].get("whole_caption", ""), result["bbox_record"], result["metadata"], build_gallery(result), result["archive_path"], result["case_dir"], ) with gr.Blocks(title="SynLayers Real-World Demo") as demo: gr.Markdown( """ # SynLayers Real-World Decomposition Upload a single image and run the full pipeline in one step: 1. VLM for whole-caption + bounding-box detection 2. SynLayers real-image layer decomposition This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU. The first request may take time while model assets are loaded from Hugging Face. In ZeroGPU mode, a shared GPU is requested only while inference is running. """ ) runtime_status = gr.Markdown(get_runtime_status_markdown()) refresh_status_button = gr.Button("Refresh Runtime Status") with gr.Row(): with gr.Column(scale=1): image_input = gr.Image(type="filepath", label="Input Image") sample_name_input = gr.Textbox( label="Optional Sample Name", placeholder="Leave empty to use the uploaded filename", ) max_new_tokens_input = gr.Slider( minimum=128, maximum=2048, value=1024, step=64, label="VLM Max New Tokens", ) seed_input = gr.Number( value=42, precision=0, label="Seed (-1 keeps config default)", ) run_button = gr.Button("Run Full Pipeline", variant="primary") with gr.Column(scale=1): bbox_vis_output = gr.Image(type="filepath", label="Detected Bounding Boxes") merged_output = gr.Image(type="filepath", label="Merged Decomposition") caption_output = gr.Textbox(label="Whole Caption", lines=6) with gr.Row(): bbox_json_output = gr.JSON(label="BBox JSON") meta_json_output = gr.JSON(label="Inference Metadata") layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto") with gr.Row(): archive_output = gr.File(label="Download Result Bundle") case_dir_output = gr.Textbox(label="Saved Case Directory") examples = list_example_images() if examples: gr.Examples(examples=examples, inputs=[image_input], label="Example Images") refresh_status_button.click( fn=get_runtime_status_markdown, outputs=runtime_status, ) run_button.click( fn=run_demo, inputs=[ image_input, sample_name_input, max_new_tokens_input, seed_input, ], outputs=[ bbox_vis_output, merged_output, caption_output, bbox_json_output, meta_json_output, layer_gallery, archive_output, case_dir_output, ], ) if __name__ == "__main__": demo.queue().launch( server_name="0.0.0.0", server_port=int(os.environ.get("PORT", "7860")), )