| from __future__ import annotations |
|
|
| import os |
| import sys |
| from pathlib import Path |
|
|
| try: |
| import spaces |
| except ImportError: |
| class _SpacesCompat: |
| @staticmethod |
| def GPU(*decorator_args, **decorator_kwargs): |
| if decorator_args and callable(decorator_args[0]) and len(decorator_args) == 1 and not decorator_kwargs: |
| return decorator_args[0] |
|
|
| def decorator(fn): |
| return fn |
|
|
| return decorator |
|
|
| spaces = _SpacesCompat() |
|
|
| import gradio as gr |
| import torch |
|
|
| CURRENT_FILE = Path(__file__).resolve() |
| PROJECT_ROOT = CURRENT_FILE.parents[1] |
| for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]): |
| if (candidate / "infer").exists() and (candidate / "models").exists(): |
| PROJECT_ROOT = candidate |
| break |
| if str(PROJECT_ROOT) not in sys.path: |
| sys.path.insert(0, str(PROJECT_ROOT)) |
|
|
| from demo.real_world_pipeline import ( |
| DEFAULT_BBOX_MODEL, |
| DEFAULT_REAL_CONFIG_PATH, |
| DEFAULT_RUN_NAME, |
| DEFAULT_WORK_DIR, |
| run_real_world_pipeline, |
| ) |
|
|
| DEFAULT_EXAMPLE_DIR = Path( |
| os.environ.get( |
| "SYNLAYERS_EXAMPLE_DIR", |
| "/project/llmsvgen/share/data/kmw_layered_dataset/real_world_inference/layers_real_test_1024", |
| ) |
| ) |
|
|
|
|
| def read_int_env(name: str, default: int) -> int: |
| raw = os.environ.get(name) |
| if raw is None: |
| return default |
| try: |
| return int(raw) |
| except ValueError: |
| return default |
|
|
|
|
| ZERO_GPU_SIZE = (os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large").lower() |
| ZERO_GPU_DURATION = max(60, read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 900)) |
|
|
|
|
| def list_example_images(limit: int = 6) -> list[list[str]]: |
| if not DEFAULT_EXAMPLE_DIR.exists(): |
| return [] |
|
|
| candidates = [] |
| for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"): |
| candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext)) |
| candidates = sorted(candidates)[:limit] |
| return [[str(path)] for path in candidates] |
|
|
|
|
| def build_gallery(result: dict) -> list[tuple[str, str]]: |
| gallery: list[tuple[str, str]] = [] |
| if result.get("whole_image_rgba"): |
| gallery.append((result["whole_image_rgba"], "Whole RGBA")) |
| if result.get("background_rgba"): |
| gallery.append((result["background_rgba"], "Background RGBA")) |
| for idx, path in enumerate(result.get("layer_images", [])): |
| gallery.append((path, f"Layer {idx}")) |
| return gallery |
|
|
|
|
| def get_gpu_name() -> str: |
| if not torch.cuda.is_available(): |
| return "None" |
| try: |
| return torch.cuda.get_device_name(torch.cuda.current_device()) |
| except Exception as exc: |
| return f"Unavailable ({exc})" |
|
|
|
|
| def is_zero_gpu_space() -> bool: |
| accelerator = os.environ.get("ACCELERATOR", "").lower() |
| return ( |
| os.environ.get("ZEROGPU_V2", "").lower() == "true" |
| or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1" |
| or accelerator == "zerogpu" |
| or accelerator.startswith("zero") |
| ) |
|
|
|
|
| def get_runtime_status_markdown() -> str: |
| accelerator = os.environ.get("ACCELERATOR", "unknown") |
| space_id = os.environ.get("SPACE_ID", "local") |
| model_repo = os.environ.get("SYNLAYERS_MODEL_REPO", "(unset)") |
| zero_gpu_enabled = is_zero_gpu_space() |
|
|
| lines = ["## Runtime Status", f"- `SPACE_ID`: `{space_id}`", f"- `ACCELERATOR`: `{accelerator}`"] |
|
|
| if zero_gpu_enabled: |
| lines.extend( |
| [ |
| f"- `ZeroGPU mode`: `True`", |
| f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`", |
| f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds", |
| f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`", |
| f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`", |
| "", |
| "This Space is configured for Hugging Face ZeroGPU.", |
| "A shared H200 GPU is requested on demand when you click `Run Full Pipeline`.", |
| "Queueing and quota are managed by Hugging Face ZeroGPU, not by an in-app GPU selector.", |
| ] |
| ) |
| else: |
| cuda_available = torch.cuda.is_available() |
| lines.extend( |
| [ |
| f"- `CUDA available`: `{cuda_available}`", |
| f"- `GPU device`: `{get_gpu_name()}`", |
| f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`", |
| "", |
| ] |
| ) |
|
|
| if accelerator == "none" or not cuda_available: |
| lines.extend( |
| [ |
| "This Space is not currently running with a usable CUDA GPU.", |
| "The GPU type must be chosen by the Space owner in Hugging Face `Settings -> Hardware`.", |
| "Visitors cannot switch GPUs from inside the Gradio app.", |
| ] |
| ) |
| else: |
| lines.append("The CUDA runtime is available and the full SynLayers pipeline can run here.") |
|
|
| return "\n".join(lines) |
|
|
|
|
| @spaces.GPU(duration=ZERO_GPU_DURATION, size=ZERO_GPU_SIZE) |
| def run_demo_inference( |
| image_path: str, |
| sample_name: str, |
| max_new_tokens: int, |
| seed_value: float, |
| ) -> dict: |
| seed = int(seed_value) if seed_value >= 0 else None |
| return run_real_world_pipeline( |
| image_path=image_path, |
| sample_name=sample_name or None, |
| work_dir=DEFAULT_WORK_DIR, |
| bbox_model=DEFAULT_BBOX_MODEL, |
| config_path=DEFAULT_REAL_CONFIG_PATH, |
| max_new_tokens=int(max_new_tokens), |
| seed=seed, |
| run_name=DEFAULT_RUN_NAME, |
| ) |
|
|
|
|
| def run_demo( |
| image_path: str, |
| sample_name: str, |
| max_new_tokens: int, |
| seed_value: float, |
| ): |
| if not image_path: |
| raise gr.Error("Please upload an input image first.") |
|
|
| try: |
| result = run_demo_inference( |
| image_path=image_path, |
| sample_name=sample_name, |
| max_new_tokens=max_new_tokens, |
| seed_value=seed_value, |
| ) |
| except Exception as exc: |
| raise gr.Error(str(exc)) from exc |
|
|
| return ( |
| result["bbox_visualization"], |
| result["merged_image"], |
| result["bbox_record"].get("whole_caption", ""), |
| result["bbox_record"], |
| result["metadata"], |
| build_gallery(result), |
| result["archive_path"], |
| result["case_dir"], |
| ) |
|
|
|
|
| with gr.Blocks(title="SynLayers Real-World Demo") as demo: |
| gr.Markdown( |
| """ |
| # SynLayers Real-World Decomposition |
| |
| Upload a single image and run the full pipeline in one step: |
| 1. VLM for whole-caption + bounding-box detection |
| 2. SynLayers real-image layer decomposition |
| |
| This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU. |
| The first request may take time while model assets are loaded from Hugging Face. |
| |
| In ZeroGPU mode, a shared GPU is requested only while inference is running. |
| """ |
| ) |
| runtime_status = gr.Markdown(get_runtime_status_markdown()) |
| refresh_status_button = gr.Button("Refresh Runtime Status") |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| image_input = gr.Image(type="filepath", label="Input Image") |
| sample_name_input = gr.Textbox( |
| label="Optional Sample Name", |
| placeholder="Leave empty to use the uploaded filename", |
| ) |
| max_new_tokens_input = gr.Slider( |
| minimum=128, |
| maximum=2048, |
| value=1024, |
| step=64, |
| label="VLM Max New Tokens", |
| ) |
| seed_input = gr.Number( |
| value=42, |
| precision=0, |
| label="Seed (-1 keeps config default)", |
| ) |
| run_button = gr.Button("Run Full Pipeline", variant="primary") |
|
|
| with gr.Column(scale=1): |
| bbox_vis_output = gr.Image(type="filepath", label="Detected Bounding Boxes") |
| merged_output = gr.Image(type="filepath", label="Merged Decomposition") |
|
|
| caption_output = gr.Textbox(label="Whole Caption", lines=6) |
| with gr.Row(): |
| bbox_json_output = gr.JSON(label="BBox JSON") |
| meta_json_output = gr.JSON(label="Inference Metadata") |
| layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto") |
| with gr.Row(): |
| archive_output = gr.File(label="Download Result Bundle") |
| case_dir_output = gr.Textbox(label="Saved Case Directory") |
|
|
| examples = list_example_images() |
| if examples: |
| gr.Examples(examples=examples, inputs=[image_input], label="Example Images") |
|
|
| refresh_status_button.click( |
| fn=get_runtime_status_markdown, |
| outputs=runtime_status, |
| ) |
|
|
| run_button.click( |
| fn=run_demo, |
| inputs=[ |
| image_input, |
| sample_name_input, |
| max_new_tokens_input, |
| seed_input, |
| ], |
| outputs=[ |
| bbox_vis_output, |
| merged_output, |
| caption_output, |
| bbox_json_output, |
| meta_json_output, |
| layer_gallery, |
| archive_output, |
| case_dir_output, |
| ], |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.queue().launch( |
| server_name="0.0.0.0", |
| server_port=int(os.environ.get("PORT", "7860")), |
| ) |
|
|