synlayers / app.py
SynLayers's picture
Upload app.py with huggingface_hub
ac53204 verified
from __future__ import annotations
import os
import sys
from pathlib import Path
try:
import spaces
except ImportError:
class _SpacesCompat:
@staticmethod
def GPU(*decorator_args, **decorator_kwargs):
if decorator_args and callable(decorator_args[0]) and len(decorator_args) == 1 and not decorator_kwargs:
return decorator_args[0]
def decorator(fn):
return fn
return decorator
spaces = _SpacesCompat()
import gradio as gr
import torch
CURRENT_FILE = Path(__file__).resolve()
PROJECT_ROOT = CURRENT_FILE.parents[1]
for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]):
if (candidate / "infer").exists() and (candidate / "models").exists():
PROJECT_ROOT = candidate
break
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from demo.real_world_pipeline import ( # noqa: E402
DEFAULT_BBOX_MODEL,
DEFAULT_REAL_CONFIG_PATH,
DEFAULT_RUN_NAME,
DEFAULT_WORK_DIR,
run_real_world_pipeline,
)
DEFAULT_EXAMPLE_DIR = Path(
os.environ.get(
"SYNLAYERS_EXAMPLE_DIR",
"/project/llmsvgen/share/data/kmw_layered_dataset/real_world_inference/layers_real_test_1024",
)
)
def read_int_env(name: str, default: int) -> int:
raw = os.environ.get(name)
if raw is None:
return default
try:
return int(raw)
except ValueError:
return default
ZERO_GPU_SIZE = (os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large").lower()
ZERO_GPU_DURATION = max(60, read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 900))
def list_example_images(limit: int = 6) -> list[list[str]]:
if not DEFAULT_EXAMPLE_DIR.exists():
return []
candidates = []
for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"):
candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext))
candidates = sorted(candidates)[:limit]
return [[str(path)] for path in candidates]
def build_gallery(result: dict) -> list[tuple[str, str]]:
gallery: list[tuple[str, str]] = []
if result.get("whole_image_rgba"):
gallery.append((result["whole_image_rgba"], "Whole RGBA"))
if result.get("background_rgba"):
gallery.append((result["background_rgba"], "Background RGBA"))
for idx, path in enumerate(result.get("layer_images", [])):
gallery.append((path, f"Layer {idx}"))
return gallery
def get_gpu_name() -> str:
if not torch.cuda.is_available():
return "None"
try:
return torch.cuda.get_device_name(torch.cuda.current_device())
except Exception as exc: # pragma: no cover - defensive runtime reporting
return f"Unavailable ({exc})"
def is_zero_gpu_space() -> bool:
accelerator = os.environ.get("ACCELERATOR", "").lower()
return (
os.environ.get("ZEROGPU_V2", "").lower() == "true"
or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1"
or accelerator == "zerogpu"
or accelerator.startswith("zero")
)
def get_runtime_status_markdown() -> str:
accelerator = os.environ.get("ACCELERATOR", "unknown")
space_id = os.environ.get("SPACE_ID", "local")
model_repo = os.environ.get("SYNLAYERS_MODEL_REPO", "(unset)")
zero_gpu_enabled = is_zero_gpu_space()
lines = ["## Runtime Status", f"- `SPACE_ID`: `{space_id}`", f"- `ACCELERATOR`: `{accelerator}`"]
if zero_gpu_enabled:
lines.extend(
[
f"- `ZeroGPU mode`: `True`",
f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`",
f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds",
f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`",
"",
"This Space is configured for Hugging Face ZeroGPU.",
"A shared H200 GPU is requested on demand when you click `Run Full Pipeline`.",
"Queueing and quota are managed by Hugging Face ZeroGPU, not by an in-app GPU selector.",
]
)
else:
cuda_available = torch.cuda.is_available()
lines.extend(
[
f"- `CUDA available`: `{cuda_available}`",
f"- `GPU device`: `{get_gpu_name()}`",
f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
"",
]
)
if accelerator == "none" or not cuda_available:
lines.extend(
[
"This Space is not currently running with a usable CUDA GPU.",
"The GPU type must be chosen by the Space owner in Hugging Face `Settings -> Hardware`.",
"Visitors cannot switch GPUs from inside the Gradio app.",
]
)
else:
lines.append("The CUDA runtime is available and the full SynLayers pipeline can run here.")
return "\n".join(lines)
@spaces.GPU(duration=ZERO_GPU_DURATION, size=ZERO_GPU_SIZE)
def run_demo_inference(
image_path: str,
sample_name: str,
max_new_tokens: int,
seed_value: float,
) -> dict:
seed = int(seed_value) if seed_value >= 0 else None
return run_real_world_pipeline(
image_path=image_path,
sample_name=sample_name or None,
work_dir=DEFAULT_WORK_DIR,
bbox_model=DEFAULT_BBOX_MODEL,
config_path=DEFAULT_REAL_CONFIG_PATH,
max_new_tokens=int(max_new_tokens),
seed=seed,
run_name=DEFAULT_RUN_NAME,
)
def run_demo(
image_path: str,
sample_name: str,
max_new_tokens: int,
seed_value: float,
):
if not image_path:
raise gr.Error("Please upload an input image first.")
try:
result = run_demo_inference(
image_path=image_path,
sample_name=sample_name,
max_new_tokens=max_new_tokens,
seed_value=seed_value,
)
except Exception as exc:
raise gr.Error(str(exc)) from exc
return (
result["bbox_visualization"],
result["merged_image"],
result["bbox_record"].get("whole_caption", ""),
result["bbox_record"],
result["metadata"],
build_gallery(result),
result["archive_path"],
result["case_dir"],
)
with gr.Blocks(title="SynLayers Real-World Demo") as demo:
gr.Markdown(
"""
# SynLayers Real-World Decomposition
Upload a single image and run the full pipeline in one step:
1. VLM for whole-caption + bounding-box detection
2. SynLayers real-image layer decomposition
This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
The first request may take time while model assets are loaded from Hugging Face.
In ZeroGPU mode, a shared GPU is requested only while inference is running.
"""
)
runtime_status = gr.Markdown(get_runtime_status_markdown())
refresh_status_button = gr.Button("Refresh Runtime Status")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(type="filepath", label="Input Image")
sample_name_input = gr.Textbox(
label="Optional Sample Name",
placeholder="Leave empty to use the uploaded filename",
)
max_new_tokens_input = gr.Slider(
minimum=128,
maximum=2048,
value=1024,
step=64,
label="VLM Max New Tokens",
)
seed_input = gr.Number(
value=42,
precision=0,
label="Seed (-1 keeps config default)",
)
run_button = gr.Button("Run Full Pipeline", variant="primary")
with gr.Column(scale=1):
bbox_vis_output = gr.Image(type="filepath", label="Detected Bounding Boxes")
merged_output = gr.Image(type="filepath", label="Merged Decomposition")
caption_output = gr.Textbox(label="Whole Caption", lines=6)
with gr.Row():
bbox_json_output = gr.JSON(label="BBox JSON")
meta_json_output = gr.JSON(label="Inference Metadata")
layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto")
with gr.Row():
archive_output = gr.File(label="Download Result Bundle")
case_dir_output = gr.Textbox(label="Saved Case Directory")
examples = list_example_images()
if examples:
gr.Examples(examples=examples, inputs=[image_input], label="Example Images")
refresh_status_button.click(
fn=get_runtime_status_markdown,
outputs=runtime_status,
)
run_button.click(
fn=run_demo,
inputs=[
image_input,
sample_name_input,
max_new_tokens_input,
seed_input,
],
outputs=[
bbox_vis_output,
merged_output,
caption_output,
bbox_json_output,
meta_json_output,
layer_gallery,
archive_output,
case_dir_output,
],
)
if __name__ == "__main__":
demo.queue().launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", "7860")),
)