Spaces:

ideogram-ai
/

ideogram4

Running on Zero

App Files Files Community

multimodalart HF Staff commited on Jun 3

Commit

4c728e2

verified ·

1 Parent(s): e04df5f

Remote Ideogram magic-prompt (default) + local Qwen fallback radio; lazy enhancer; AOTI off (recompiling)

Browse files

Files changed (1) hide show

app.py +70 -46

app.py CHANGED Viewed

@@ -13,9 +13,9 @@ import time
 from threading import Thread
 import gradio as gr
 import spaces
 import torch
-from huggingface_hub import hf_hub_download
 from diffusers import Ideogram4Pipeline
@@ -43,6 +43,11 @@ AOTI_REPO = "multimodalart/i4-block-aoti"
 AOTI_BLOCK_FILE = "Ideogram4TransformerBlock/package.pt2"
 MAX_SEED = 2**31 - 1
 # V4 presets (forward step-order: main CFG 7.0 -> polish 3.0).
 MODES = {
     "Turbo · 12 steps": dict(num_inference_steps=12, guidance_schedule=(7.0,) * 11 + (3.0,) * 1, mu=0.5, std=1.75),
@@ -59,32 +64,12 @@ pipe.unconditional_transformer.dequantize()
 pipe.to("cuda")
 print(f"[timing] pipeline load + dequant: {time.perf_counter() - t:.1f}s", flush=True)
-# --- Native prompt enhancer (grafts the hosted LM head + builds the Outlines processor) at startup. ---
-try:
-    t = time.perf_counter()
-    pipe.load_prompt_enhancer(lm_head_repo_id=LM_HEAD_REPO)
-    pipe._caption_model.lm_head.to("cuda")  # ZeroGPU-deferred move of just the grafted head
-    ENHANCER_OK = True
-    print(f"[timing] load_prompt_enhancer: {time.perf_counter() - t:.1f}s", flush=True)
-except Exception as e:
-    ENHANCER_OK = False
-    print(f"[enhancer] disabled: {e!r}", flush=True)
-# Pre-fetch the AOTI package AND pre-warm torch-inductor's CPU-ISA probe in the PARENT. The probe
-# (valid_vec_isa_list) compiles test programs (~seconds) the first time aoti_blocks_load builds a
-# LazyAOTIModel; doing it here once means every ZeroGPU fork inherits the functools.cache, so the
-# per-worker aoti_blocks_load is just the ~instant block patch instead of a ~20s compile.
-try:
-    hf_hub_download(AOTI_REPO, "package.pt2", subfolder="Ideogram4TransformerBlock")
-    from torch._inductor.cpu_vec_isa import valid_vec_isa_list
-    t = time.perf_counter()
-    valid_vec_isa_list()
-    print(f"[timing] vec-isa prewarm (parent): {time.perf_counter() - t:.1f}s", flush=True)
-    AOTI_OK = True
-except Exception as e:
-    AOTI_OK = False
-    print(f"[aoti] prefetch/prewarm failed, running eager: {e!r}", flush=True)
 _AOTI_APPLIED = False
@@ -107,22 +92,61 @@ def _apply_aoti():
         print(f"[aoti] apply failed, running eager: {e!r}", flush=True)
 @spaces.GPU(duration=240, size="xlarge")
-def generate(prompt, mode, enhance, width, height, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
     t_enter = time.perf_counter()
     if randomize_seed or seed < 0:
         seed = random.randint(0, MAX_SEED)
-    # Overlap the AOTI block-patch with upsampling: the transformer is idle while the text encoder runs.
     aoti_thread = Thread(target=_apply_aoti, daemon=True)
     aoti_thread.start()
     final_prompt = prompt
-    if enhance and ENHANCER_OK:
-        progress(0.0, desc="✍️ Upsampling prompt…")
         t = time.perf_counter()
-        final_prompt = pipe.upsample_prompt(prompt, height=int(height), width=int(width))[0]
-        print(f"[timing] upsample: {time.perf_counter() - t:.2f}s", flush=True)
     aoti_thread.join()  # ensure blocks are patched before the diffusion loop
     print(f"[timing] pre-diffusion (enter -> ready): {time.perf_counter() - t_enter:.2f}s", flush=True)
@@ -143,12 +167,11 @@ def generate(prompt, mode, enhance, width, height, seed, randomize_seed, progres
 @spaces.GPU(size="xlarge")
 def _warmup():
-    """Pay the AOTI patch + warm the upsampler on the startup worker (upsample only, no diffusion)."""
-    _apply_aoti()
-    if ENHANCER_OK:
-        t = time.perf_counter()
-        pipe.upsample_prompt("a red apple on a wooden table", height=1024, width=1024)
-        print(f"[timing] warmup upsample: {time.perf_counter() - t:.2f}s", flush=True)
 try:
@@ -162,9 +185,9 @@ with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4 (NF4) — diffusers p
         "## Ideogram 4 (NF4) — diffusers preview\n"
         f"Private demo of [`{MODEL_ID}`](https://huggingface.co/{MODEL_ID}) on the "
         "[diffusers PR](https://github.com/huggingface/diffusers-new-model-addition-ideogram) branch, on ZeroGPU.\n"
-        "**Prompt upsampling** rewrites your idea into Ideogram's native structured JSON caption "
-        "(the pipeline's own Qwen3-VL encoder + a grafted LM head + Outlines) via the native "
-        "`pipe.upsample_prompt`."
     )
     with gr.Row():
@@ -173,10 +196,11 @@ with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4 (NF4) — diffusers p
             mode = gr.Radio(choices=list(MODES.keys()), value="Default · 20 steps", label="Mode (speed ↔ quality)")
             run = gr.Button("Generate", variant="primary")
             with gr.Accordion("Advanced", open=False):
-                enhance = gr.Checkbox(
-                    label="Prompt upsampling",
-                    value=True,
-                    info="Rewrite the prompt into Ideogram's native JSON caption before generating.",
                 )
                 with gr.Row():
                     width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
@@ -190,7 +214,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4 (NF4) — diffusers p
     run.click(
         generate,
-        inputs=[prompt, mode, enhance, width, height, seed, randomize],
         outputs=[out_image, seed, out_caption],
     )

 from threading import Thread
 import gradio as gr
+import requests
 import spaces
 import torch
 from diffusers import Ideogram4Pipeline
 AOTI_BLOCK_FILE = "Ideogram4TransformerBlock/package.pt2"
 MAX_SEED = 2**31 - 1
+# Prompt upsampling: Ideogram's hosted magic-prompt (default) with the local Qwen graft as fallback.
+IDEOGRAM_MAGIC_PROMPT_URL = "https://api.ideogram.ai/v1/ideogram-v4/magic-prompt"
+IDEOGRAM_API_KEY = os.environ.get("IDEOGRAM_API_KEY")
+UPSAMPLERS = ["Ideogram (remote)", "Qwen (local)"]
 # V4 presets (forward step-order: main CFG 7.0 -> polish 3.0).
 MODES = {
     "Turbo · 12 steps": dict(num_inference_steps=12, guidance_schedule=(7.0,) * 11 + (3.0,) * 1, mu=0.5, std=1.75),
 pipe.to("cuda")
 print(f"[timing] pipeline load + dequant: {time.perf_counter() - t:.1f}s", flush=True)
+# The local prompt-enhancer LM head is grafted lazily by `pipe.upsample_prompt` on first use (onto the worker's
+# GPU), so no explicit load is needed here. Local is only the fallback; Ideogram's remote API is the default.
+# AOTI off: PR #5 changed the block forward (5 flat args -> 4 with a rope tuple), so the compiled .so is
+# stale. Recompiling against the new block; re-enable (prefetch + vec-isa prewarm) once the artifact is rebuilt.
+AOTI_OK = False
 _AOTI_APPLIED = False
         print(f"[aoti] apply failed, running eager: {e!r}", flush=True)
+def remote_upsample(prompt, width, height):
+    """Rewrite the prompt into Ideogram's native JSON caption via the hosted magic-prompt API."""
+    d = math.gcd(width, height) or 1
+    aspect_ratio = f"{width // d}x{height // d}"  # Ideogram's WxH form
+    resp = requests.post(
+        IDEOGRAM_MAGIC_PROMPT_URL,
+        headers={"Api-Key": IDEOGRAM_API_KEY, "Content-Type": "application/json"},
+        json={"text_prompt": prompt, "aspect_ratio": aspect_ratio},
+        timeout=120,
+    )
+    resp.raise_for_status()
+    jp = resp.json().get("json_prompt")
+    if not jp:
+        raise RuntimeError("Ideogram API returned no json_prompt")
+    jp.pop("aspect_ratio", None)
+    for el in jp.get("compositional_deconstruction", {}).get("elements", []):
+        if isinstance(el, dict):
+            el.pop("bbox", None)
+    return json.dumps(jp, ensure_ascii=False, separators=(",", ":"))
 @spaces.GPU(duration=240, size="xlarge")
+def generate(prompt, mode, upsampler, width, height, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
     t_enter = time.perf_counter()
     if randomize_seed or seed < 0:
         seed = random.randint(0, MAX_SEED)
+    # Overlap the AOTI block-patch with upsampling: the transformer is idle while we upsample.
     aoti_thread = Thread(target=_apply_aoti, daemon=True)
     aoti_thread.start()
+    # Always upsample. Prefer Ideogram's hosted magic-prompt; fall back to the local Qwen graft on any failure.
+    use_remote = upsampler == UPSAMPLERS[0] and bool(IDEOGRAM_API_KEY)
     final_prompt = prompt
+    if use_remote:
+        progress(0.0, desc="✍️ Upsampling (Ideogram)…")
+        t = time.perf_counter()
+        try:
+            final_prompt = remote_upsample(prompt, int(width), int(height))
+            print(f"[timing] upsample remote: {time.perf_counter() - t:.2f}s", flush=True)
+        except Exception as e:
+            print(f"[upsample] remote failed, falling back to local: {e!r}", flush=True)
+            gr.Warning("Ideogram API unavailable — using the local Qwen upsampler.")
+            use_remote = False
+    if not use_remote:
+        progress(0.0, desc="✍️ Upsampling (local Qwen)…")
         t = time.perf_counter()
+        try:
+            final_prompt = pipe.upsample_prompt(
+                prompt, height=int(height), width=int(width), lm_head_repo_id=LM_HEAD_REPO
+            )[0]
+            print(f"[timing] upsample local: {time.perf_counter() - t:.2f}s", flush=True)
+        except Exception as e:
+            print(f"[upsample] local failed: {e!r}", flush=True)
+            gr.Warning("Local upsampler unavailable — generating from the raw prompt.")
     aoti_thread.join()  # ensure blocks are patched before the diffusion loop
     print(f"[timing] pre-diffusion (enter -> ready): {time.perf_counter() - t_enter:.2f}s", flush=True)
 @spaces.GPU(size="xlarge")
 def _warmup():
+    """Warm the local upsampler (lazy LM-head graft) on the startup worker (no diffusion)."""
+    _apply_aoti()  # no-op while AOTI is disabled
+    t = time.perf_counter()
+    pipe.upsample_prompt("a red apple on a wooden table", height=1024, width=1024, lm_head_repo_id=LM_HEAD_REPO)
+    print(f"[timing] warmup upsample: {time.perf_counter() - t:.2f}s", flush=True)
 try:
         "## Ideogram 4 (NF4) — diffusers preview\n"
         f"Private demo of [`{MODEL_ID}`](https://huggingface.co/{MODEL_ID}) on the "
         "[diffusers PR](https://github.com/huggingface/diffusers-new-model-addition-ideogram) branch, on ZeroGPU.\n"
+        "**Prompt upsampling** rewrites your idea into Ideogram's native structured JSON caption. "
+        "**Ideogram (remote)** uses the hosted magic-prompt API; **Qwen (local)** uses the pipeline's own "
+        "Qwen3-VL encoder + a grafted LM head + Outlines. Remote is the default; local is the fallback."
     )
     with gr.Row():
             mode = gr.Radio(choices=list(MODES.keys()), value="Default · 20 steps", label="Mode (speed ↔ quality)")
             run = gr.Button("Generate", variant="primary")
             with gr.Accordion("Advanced", open=False):
+                upsampler = gr.Radio(
+                    choices=UPSAMPLERS,
+                    value=UPSAMPLERS[0],
+                    label="Prompt upsampler",
+                    info="Rewrite into Ideogram's native JSON caption. Remote (Ideogram) preferred; falls back to local.",
                 )
                 with gr.Row():
                     width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
     run.click(
         generate,
+        inputs=[prompt, mode, upsampler, width, height, seed, randomize],
         outputs=[out_image, seed, out_caption],
     )