Spaces:

ideogram-ai
/

ideogram4

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 3 days ago

Commit

d3ed6b5

verified ·

1 Parent(s): 31d862b

Re-enable AOTI (PR#6 artifact) + model-card blurb + title

Browse files

Files changed (1) hide show

app.py +26 -10

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ import gradio as gr
 import requests
 import spaces
 import torch
 from diffusers import Ideogram4Pipeline
@@ -58,19 +59,33 @@ MODES = {
     "Quality · 48 steps": dict(num_inference_steps=48, guidance_schedule=(7.0,) * 45 + (3.0,) * 3, mu=0.0, std=1.5),
 }
-# --- Pipeline (nf4). No dequantize: that was only to give AOTI bf16 weights to bind; with AOTI off we run
-# the nf4 transformers directly (less VRAM, faster startup). Re-add dequantize when re-enabling AOTI. ---
 t = time.perf_counter()
 pipe = Ideogram4Pipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
 pipe.to("cuda")
-print(f"[timing] pipeline load: {time.perf_counter() - t:.1f}s", flush=True)
 # The local prompt-enhancer LM head is grafted lazily by `pipe.upsample_prompt` on first use (onto the worker's
 # GPU), so no explicit load is needed here. Local is only the fallback; Ideogram's remote API is the default.
-# AOTI off: PR #5 changed the block forward (5 flat args -> 4 with a rope tuple), so the compiled .so is
-# stale. Recompiling against the new block; re-enable (prefetch + vec-isa prewarm) once the artifact is rebuilt.
-AOTI_OK = False
 _AOTI_APPLIED = False
@@ -181,12 +196,13 @@ except Exception as e:  # a flaky ZeroGPU worker must not take down the Space
     print(f"[warmup] failed (will warm lazily on first request): {e!r}", flush=True)
-with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4 (NF4) — diffusers preview") as demo:
     gr.Markdown(
         "# Ideogram 4\n"
-        "The first open-weights Ideogram model — a 9.3B-parameter text-to-image diffusion model with strong "
-        "prompt adherence and text rendering.\n\n"
-        "[Model](https://huggingface.co/ideogram-ai/ideogram-4-nf4) · Blog (soon)"
     )
     with gr.Row():

 import requests
 import spaces
 import torch
+from huggingface_hub import hf_hub_download
 from diffusers import Ideogram4Pipeline
     "Quality · 48 steps": dict(num_inference_steps=48, guidance_schedule=(7.0,) * 45 + (3.0,) * 3, mu=0.0, std=1.5),
 }
+# --- Pipeline: dequantize both transformers nf4 -> bf16 in the parent (CPU) so AOTI can bind its weight-less
+# graph to real bf16 weights (this is repo cold start, which is fine; function cold start stays fast). ---
 t = time.perf_counter()
 pipe = Ideogram4Pipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
+pipe.transformer.dequantize()
+pipe.unconditional_transformer.dequantize()
 pipe.to("cuda")
+print(f"[timing] pipeline load + dequant: {time.perf_counter() - t:.1f}s", flush=True)
 # The local prompt-enhancer LM head is grafted lazily by `pipe.upsample_prompt` on first use (onto the worker's
 # GPU), so no explicit load is needed here. Local is only the fallback; Ideogram's remote API is the default.
+# Pre-fetch the AOTI package AND pre-warm torch-inductor's CPU-ISA probe in the PARENT (repo cold start). The
+# probe (valid_vec_isa_list) compiles test programs (~20s) the first time aoti_blocks_load builds a LazyAOTIModel;
+# doing it once here means every ZeroGPU fork inherits the functools.cache, so per-worker (function cold start)
+# aoti_blocks_load is just the ~instant block patch instead of a ~20s compile.
+try:
+    hf_hub_download(AOTI_REPO, "package.pt2", subfolder="Ideogram4TransformerBlock")
+    from torch._inductor.cpu_vec_isa import valid_vec_isa_list
+    t = time.perf_counter()
+    valid_vec_isa_list()
+    print(f"[timing] vec-isa prewarm (parent): {time.perf_counter() - t:.1f}s", flush=True)
+    AOTI_OK = True
+except Exception as e:
+    AOTI_OK = False
+    print(f"[aoti] prefetch/prewarm failed, running eager: {e!r}", flush=True)
 _AOTI_APPLIED = False
     print(f"[warmup] failed (will warm lazily on first request): {e!r}", flush=True)
+with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4") as demo:
     gr.Markdown(
         "# Ideogram 4\n"
+        "Ideogram's first open-weights model — a 9.3B-parameter text-to-image foundation model at the "
+        "forefront of design, with best-in-class text rendering.\n\n"
+        "[Model](https://huggingface.co/ideogram-ai/ideogram-4-nf4) · "
+        "[Blog](https://ideogram.ai/blog/ideogram-4.0/)"
     )
     with gr.Row():