multimodalart HF Staff commited on
Commit
4c728e2
·
verified ·
1 Parent(s): e04df5f

Remote Ideogram magic-prompt (default) + local Qwen fallback radio; lazy enhancer; AOTI off (recompiling)

Browse files
Files changed (1) hide show
  1. app.py +70 -46
app.py CHANGED
@@ -13,9 +13,9 @@ import time
13
  from threading import Thread
14
 
15
  import gradio as gr
 
16
  import spaces
17
  import torch
18
- from huggingface_hub import hf_hub_download
19
 
20
  from diffusers import Ideogram4Pipeline
21
 
@@ -43,6 +43,11 @@ AOTI_REPO = "multimodalart/i4-block-aoti"
43
  AOTI_BLOCK_FILE = "Ideogram4TransformerBlock/package.pt2"
44
  MAX_SEED = 2**31 - 1
45
 
 
 
 
 
 
46
  # V4 presets (forward step-order: main CFG 7.0 -> polish 3.0).
47
  MODES = {
48
  "Turbo · 12 steps": dict(num_inference_steps=12, guidance_schedule=(7.0,) * 11 + (3.0,) * 1, mu=0.5, std=1.75),
@@ -59,32 +64,12 @@ pipe.unconditional_transformer.dequantize()
59
  pipe.to("cuda")
60
  print(f"[timing] pipeline load + dequant: {time.perf_counter() - t:.1f}s", flush=True)
61
 
62
- # --- Native prompt enhancer (grafts the hosted LM head + builds the Outlines processor) at startup. ---
63
- try:
64
- t = time.perf_counter()
65
- pipe.load_prompt_enhancer(lm_head_repo_id=LM_HEAD_REPO)
66
- pipe._caption_model.lm_head.to("cuda") # ZeroGPU-deferred move of just the grafted head
67
- ENHANCER_OK = True
68
- print(f"[timing] load_prompt_enhancer: {time.perf_counter() - t:.1f}s", flush=True)
69
- except Exception as e:
70
- ENHANCER_OK = False
71
- print(f"[enhancer] disabled: {e!r}", flush=True)
72
-
73
- # Pre-fetch the AOTI package AND pre-warm torch-inductor's CPU-ISA probe in the PARENT. The probe
74
- # (valid_vec_isa_list) compiles test programs (~seconds) the first time aoti_blocks_load builds a
75
- # LazyAOTIModel; doing it here once means every ZeroGPU fork inherits the functools.cache, so the
76
- # per-worker aoti_blocks_load is just the ~instant block patch instead of a ~20s compile.
77
- try:
78
- hf_hub_download(AOTI_REPO, "package.pt2", subfolder="Ideogram4TransformerBlock")
79
- from torch._inductor.cpu_vec_isa import valid_vec_isa_list
80
 
81
- t = time.perf_counter()
82
- valid_vec_isa_list()
83
- print(f"[timing] vec-isa prewarm (parent): {time.perf_counter() - t:.1f}s", flush=True)
84
- AOTI_OK = True
85
- except Exception as e:
86
- AOTI_OK = False
87
- print(f"[aoti] prefetch/prewarm failed, running eager: {e!r}", flush=True)
88
 
89
  _AOTI_APPLIED = False
90
 
@@ -107,22 +92,61 @@ def _apply_aoti():
107
  print(f"[aoti] apply failed, running eager: {e!r}", flush=True)
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  @spaces.GPU(duration=240, size="xlarge")
111
- def generate(prompt, mode, enhance, width, height, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
112
  t_enter = time.perf_counter()
113
  if randomize_seed or seed < 0:
114
  seed = random.randint(0, MAX_SEED)
115
 
116
- # Overlap the AOTI block-patch with upsampling: the transformer is idle while the text encoder runs.
117
  aoti_thread = Thread(target=_apply_aoti, daemon=True)
118
  aoti_thread.start()
119
 
 
 
120
  final_prompt = prompt
121
- if enhance and ENHANCER_OK:
122
- progress(0.0, desc="✍️ Upsampling prompt…")
 
 
 
 
 
 
 
 
 
 
123
  t = time.perf_counter()
124
- final_prompt = pipe.upsample_prompt(prompt, height=int(height), width=int(width))[0]
125
- print(f"[timing] upsample: {time.perf_counter() - t:.2f}s", flush=True)
 
 
 
 
 
 
126
 
127
  aoti_thread.join() # ensure blocks are patched before the diffusion loop
128
  print(f"[timing] pre-diffusion (enter -> ready): {time.perf_counter() - t_enter:.2f}s", flush=True)
@@ -143,12 +167,11 @@ def generate(prompt, mode, enhance, width, height, seed, randomize_seed, progres
143
 
144
  @spaces.GPU(size="xlarge")
145
  def _warmup():
146
- """Pay the AOTI patch + warm the upsampler on the startup worker (upsample only, no diffusion)."""
147
- _apply_aoti()
148
- if ENHANCER_OK:
149
- t = time.perf_counter()
150
- pipe.upsample_prompt("a red apple on a wooden table", height=1024, width=1024)
151
- print(f"[timing] warmup upsample: {time.perf_counter() - t:.2f}s", flush=True)
152
 
153
 
154
  try:
@@ -162,9 +185,9 @@ with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4 (NF4) — diffusers p
162
  "## Ideogram 4 (NF4) — diffusers preview\n"
163
  f"Private demo of [`{MODEL_ID}`](https://huggingface.co/{MODEL_ID}) on the "
164
  "[diffusers PR](https://github.com/huggingface/diffusers-new-model-addition-ideogram) branch, on ZeroGPU.\n"
165
- "**Prompt upsampling** rewrites your idea into Ideogram's native structured JSON caption "
166
- "(the pipeline's own Qwen3-VL encoder + a grafted LM head + Outlines) via the native "
167
- "`pipe.upsample_prompt`."
168
  )
169
 
170
  with gr.Row():
@@ -173,10 +196,11 @@ with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4 (NF4) — diffusers p
173
  mode = gr.Radio(choices=list(MODES.keys()), value="Default · 20 steps", label="Mode (speed ↔ quality)")
174
  run = gr.Button("Generate", variant="primary")
175
  with gr.Accordion("Advanced", open=False):
176
- enhance = gr.Checkbox(
177
- label="Prompt upsampling",
178
- value=True,
179
- info="Rewrite the prompt into Ideogram's native JSON caption before generating.",
 
180
  )
181
  with gr.Row():
182
  width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
@@ -190,7 +214,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4 (NF4) — diffusers p
190
 
191
  run.click(
192
  generate,
193
- inputs=[prompt, mode, enhance, width, height, seed, randomize],
194
  outputs=[out_image, seed, out_caption],
195
  )
196
 
 
13
  from threading import Thread
14
 
15
  import gradio as gr
16
+ import requests
17
  import spaces
18
  import torch
 
19
 
20
  from diffusers import Ideogram4Pipeline
21
 
 
43
  AOTI_BLOCK_FILE = "Ideogram4TransformerBlock/package.pt2"
44
  MAX_SEED = 2**31 - 1
45
 
46
+ # Prompt upsampling: Ideogram's hosted magic-prompt (default) with the local Qwen graft as fallback.
47
+ IDEOGRAM_MAGIC_PROMPT_URL = "https://api.ideogram.ai/v1/ideogram-v4/magic-prompt"
48
+ IDEOGRAM_API_KEY = os.environ.get("IDEOGRAM_API_KEY")
49
+ UPSAMPLERS = ["Ideogram (remote)", "Qwen (local)"]
50
+
51
  # V4 presets (forward step-order: main CFG 7.0 -> polish 3.0).
52
  MODES = {
53
  "Turbo · 12 steps": dict(num_inference_steps=12, guidance_schedule=(7.0,) * 11 + (3.0,) * 1, mu=0.5, std=1.75),
 
64
  pipe.to("cuda")
65
  print(f"[timing] pipeline load + dequant: {time.perf_counter() - t:.1f}s", flush=True)
66
 
67
+ # The local prompt-enhancer LM head is grafted lazily by `pipe.upsample_prompt` on first use (onto the worker's
68
+ # GPU), so no explicit load is needed here. Local is only the fallback; Ideogram's remote API is the default.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # AOTI off: PR #5 changed the block forward (5 flat args -> 4 with a rope tuple), so the compiled .so is
71
+ # stale. Recompiling against the new block; re-enable (prefetch + vec-isa prewarm) once the artifact is rebuilt.
72
+ AOTI_OK = False
 
 
 
 
73
 
74
  _AOTI_APPLIED = False
75
 
 
92
  print(f"[aoti] apply failed, running eager: {e!r}", flush=True)
93
 
94
 
95
+ def remote_upsample(prompt, width, height):
96
+ """Rewrite the prompt into Ideogram's native JSON caption via the hosted magic-prompt API."""
97
+ d = math.gcd(width, height) or 1
98
+ aspect_ratio = f"{width // d}x{height // d}" # Ideogram's WxH form
99
+ resp = requests.post(
100
+ IDEOGRAM_MAGIC_PROMPT_URL,
101
+ headers={"Api-Key": IDEOGRAM_API_KEY, "Content-Type": "application/json"},
102
+ json={"text_prompt": prompt, "aspect_ratio": aspect_ratio},
103
+ timeout=120,
104
+ )
105
+ resp.raise_for_status()
106
+ jp = resp.json().get("json_prompt")
107
+ if not jp:
108
+ raise RuntimeError("Ideogram API returned no json_prompt")
109
+ jp.pop("aspect_ratio", None)
110
+ for el in jp.get("compositional_deconstruction", {}).get("elements", []):
111
+ if isinstance(el, dict):
112
+ el.pop("bbox", None)
113
+ return json.dumps(jp, ensure_ascii=False, separators=(",", ":"))
114
+
115
+
116
  @spaces.GPU(duration=240, size="xlarge")
117
+ def generate(prompt, mode, upsampler, width, height, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
118
  t_enter = time.perf_counter()
119
  if randomize_seed or seed < 0:
120
  seed = random.randint(0, MAX_SEED)
121
 
122
+ # Overlap the AOTI block-patch with upsampling: the transformer is idle while we upsample.
123
  aoti_thread = Thread(target=_apply_aoti, daemon=True)
124
  aoti_thread.start()
125
 
126
+ # Always upsample. Prefer Ideogram's hosted magic-prompt; fall back to the local Qwen graft on any failure.
127
+ use_remote = upsampler == UPSAMPLERS[0] and bool(IDEOGRAM_API_KEY)
128
  final_prompt = prompt
129
+ if use_remote:
130
+ progress(0.0, desc="✍️ Upsampling (Ideogram)…")
131
+ t = time.perf_counter()
132
+ try:
133
+ final_prompt = remote_upsample(prompt, int(width), int(height))
134
+ print(f"[timing] upsample remote: {time.perf_counter() - t:.2f}s", flush=True)
135
+ except Exception as e:
136
+ print(f"[upsample] remote failed, falling back to local: {e!r}", flush=True)
137
+ gr.Warning("Ideogram API unavailable — using the local Qwen upsampler.")
138
+ use_remote = False
139
+ if not use_remote:
140
+ progress(0.0, desc="✍️ Upsampling (local Qwen)…")
141
  t = time.perf_counter()
142
+ try:
143
+ final_prompt = pipe.upsample_prompt(
144
+ prompt, height=int(height), width=int(width), lm_head_repo_id=LM_HEAD_REPO
145
+ )[0]
146
+ print(f"[timing] upsample local: {time.perf_counter() - t:.2f}s", flush=True)
147
+ except Exception as e:
148
+ print(f"[upsample] local failed: {e!r}", flush=True)
149
+ gr.Warning("Local upsampler unavailable — generating from the raw prompt.")
150
 
151
  aoti_thread.join() # ensure blocks are patched before the diffusion loop
152
  print(f"[timing] pre-diffusion (enter -> ready): {time.perf_counter() - t_enter:.2f}s", flush=True)
 
167
 
168
  @spaces.GPU(size="xlarge")
169
  def _warmup():
170
+ """Warm the local upsampler (lazy LM-head graft) on the startup worker (no diffusion)."""
171
+ _apply_aoti() # no-op while AOTI is disabled
172
+ t = time.perf_counter()
173
+ pipe.upsample_prompt("a red apple on a wooden table", height=1024, width=1024, lm_head_repo_id=LM_HEAD_REPO)
174
+ print(f"[timing] warmup upsample: {time.perf_counter() - t:.2f}s", flush=True)
 
175
 
176
 
177
  try:
 
185
  "## Ideogram 4 (NF4) — diffusers preview\n"
186
  f"Private demo of [`{MODEL_ID}`](https://huggingface.co/{MODEL_ID}) on the "
187
  "[diffusers PR](https://github.com/huggingface/diffusers-new-model-addition-ideogram) branch, on ZeroGPU.\n"
188
+ "**Prompt upsampling** rewrites your idea into Ideogram's native structured JSON caption. "
189
+ "**Ideogram (remote)** uses the hosted magic-prompt API; **Qwen (local)** uses the pipeline's own "
190
+ "Qwen3-VL encoder + a grafted LM head + Outlines. Remote is the default; local is the fallback."
191
  )
192
 
193
  with gr.Row():
 
196
  mode = gr.Radio(choices=list(MODES.keys()), value="Default · 20 steps", label="Mode (speed ↔ quality)")
197
  run = gr.Button("Generate", variant="primary")
198
  with gr.Accordion("Advanced", open=False):
199
+ upsampler = gr.Radio(
200
+ choices=UPSAMPLERS,
201
+ value=UPSAMPLERS[0],
202
+ label="Prompt upsampler",
203
+ info="Rewrite into Ideogram's native JSON caption. Remote (Ideogram) preferred; falls back to local.",
204
  )
205
  with gr.Row():
206
  width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
 
214
 
215
  run.click(
216
  generate,
217
+ inputs=[prompt, mode, upsampler, width, height, seed, randomize],
218
  outputs=[out_image, seed, out_caption],
219
  )
220