Spaces:
Running on Zero
Running on Zero
Re-enable AOTI (PR#6 artifact) + model-card blurb + title
Browse files
app.py
CHANGED
|
@@ -19,6 +19,7 @@ import gradio as gr
|
|
| 19 |
import requests
|
| 20 |
import spaces
|
| 21 |
import torch
|
|
|
|
| 22 |
|
| 23 |
from diffusers import Ideogram4Pipeline
|
| 24 |
|
|
@@ -58,19 +59,33 @@ MODES = {
|
|
| 58 |
"Quality · 48 steps": dict(num_inference_steps=48, guidance_schedule=(7.0,) * 45 + (3.0,) * 3, mu=0.0, std=1.5),
|
| 59 |
}
|
| 60 |
|
| 61 |
-
# --- Pipeline
|
| 62 |
-
#
|
| 63 |
t = time.perf_counter()
|
| 64 |
pipe = Ideogram4Pipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
|
|
|
|
|
|
|
| 65 |
pipe.to("cuda")
|
| 66 |
-
print(f"[timing] pipeline load: {time.perf_counter() - t:.1f}s", flush=True)
|
| 67 |
|
| 68 |
# The local prompt-enhancer LM head is grafted lazily by `pipe.upsample_prompt` on first use (onto the worker's
|
| 69 |
# GPU), so no explicit load is needed here. Local is only the fallback; Ideogram's remote API is the default.
|
| 70 |
|
| 71 |
-
#
|
| 72 |
-
#
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
_AOTI_APPLIED = False
|
| 76 |
|
|
@@ -181,12 +196,13 @@ except Exception as e: # a flaky ZeroGPU worker must not take down the Space
|
|
| 181 |
print(f"[warmup] failed (will warm lazily on first request): {e!r}", flush=True)
|
| 182 |
|
| 183 |
|
| 184 |
-
with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4
|
| 185 |
gr.Markdown(
|
| 186 |
"# Ideogram 4\n"
|
| 187 |
-
"
|
| 188 |
-
"
|
| 189 |
-
"[Model](https://huggingface.co/ideogram-ai/ideogram-4-nf4) ·
|
|
|
|
| 190 |
)
|
| 191 |
|
| 192 |
with gr.Row():
|
|
|
|
| 19 |
import requests
|
| 20 |
import spaces
|
| 21 |
import torch
|
| 22 |
+
from huggingface_hub import hf_hub_download
|
| 23 |
|
| 24 |
from diffusers import Ideogram4Pipeline
|
| 25 |
|
|
|
|
| 59 |
"Quality · 48 steps": dict(num_inference_steps=48, guidance_schedule=(7.0,) * 45 + (3.0,) * 3, mu=0.0, std=1.5),
|
| 60 |
}
|
| 61 |
|
| 62 |
+
# --- Pipeline: dequantize both transformers nf4 -> bf16 in the parent (CPU) so AOTI can bind its weight-less
|
| 63 |
+
# graph to real bf16 weights (this is repo cold start, which is fine; function cold start stays fast). ---
|
| 64 |
t = time.perf_counter()
|
| 65 |
pipe = Ideogram4Pipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
|
| 66 |
+
pipe.transformer.dequantize()
|
| 67 |
+
pipe.unconditional_transformer.dequantize()
|
| 68 |
pipe.to("cuda")
|
| 69 |
+
print(f"[timing] pipeline load + dequant: {time.perf_counter() - t:.1f}s", flush=True)
|
| 70 |
|
| 71 |
# The local prompt-enhancer LM head is grafted lazily by `pipe.upsample_prompt` on first use (onto the worker's
|
| 72 |
# GPU), so no explicit load is needed here. Local is only the fallback; Ideogram's remote API is the default.
|
| 73 |
|
| 74 |
+
# Pre-fetch the AOTI package AND pre-warm torch-inductor's CPU-ISA probe in the PARENT (repo cold start). The
|
| 75 |
+
# probe (valid_vec_isa_list) compiles test programs (~20s) the first time aoti_blocks_load builds a LazyAOTIModel;
|
| 76 |
+
# doing it once here means every ZeroGPU fork inherits the functools.cache, so per-worker (function cold start)
|
| 77 |
+
# aoti_blocks_load is just the ~instant block patch instead of a ~20s compile.
|
| 78 |
+
try:
|
| 79 |
+
hf_hub_download(AOTI_REPO, "package.pt2", subfolder="Ideogram4TransformerBlock")
|
| 80 |
+
from torch._inductor.cpu_vec_isa import valid_vec_isa_list
|
| 81 |
+
|
| 82 |
+
t = time.perf_counter()
|
| 83 |
+
valid_vec_isa_list()
|
| 84 |
+
print(f"[timing] vec-isa prewarm (parent): {time.perf_counter() - t:.1f}s", flush=True)
|
| 85 |
+
AOTI_OK = True
|
| 86 |
+
except Exception as e:
|
| 87 |
+
AOTI_OK = False
|
| 88 |
+
print(f"[aoti] prefetch/prewarm failed, running eager: {e!r}", flush=True)
|
| 89 |
|
| 90 |
_AOTI_APPLIED = False
|
| 91 |
|
|
|
|
| 196 |
print(f"[warmup] failed (will warm lazily on first request): {e!r}", flush=True)
|
| 197 |
|
| 198 |
|
| 199 |
+
with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4") as demo:
|
| 200 |
gr.Markdown(
|
| 201 |
"# Ideogram 4\n"
|
| 202 |
+
"Ideogram's first open-weights model — a 9.3B-parameter text-to-image foundation model at the "
|
| 203 |
+
"forefront of design, with best-in-class text rendering.\n\n"
|
| 204 |
+
"[Model](https://huggingface.co/ideogram-ai/ideogram-4-nf4) · "
|
| 205 |
+
"[Blog](https://ideogram.ai/blog/ideogram-4.0/)"
|
| 206 |
)
|
| 207 |
|
| 208 |
with gr.Row():
|