multimodalart HF Staff commited on
Commit
d00ab3b
·
verified ·
1 Parent(s): 223d5e9

Dynamic GPU duration (per-step model); move remote upsample off-GPU

Browse files
Files changed (1) hide show
  1. app.py +63 -40
app.py CHANGED
@@ -126,52 +126,48 @@ def remote_upsample(prompt, width, height):
126
  return json.dumps(jp, ensure_ascii=False, separators=(",", ":"))
127
 
128
 
129
- @spaces.GPU(duration=120, size="xlarge")
130
- def generate(
131
- prompt,
132
- mode="Default · 20 steps",
133
- upsampler=UPSAMPLERS[0],
134
- width=1024,
135
- height=1024,
136
- seed=0,
137
- randomize_seed=False,
138
- progress=gr.Progress(track_tqdm=True),
139
- ):
140
- t_enter = time.perf_counter()
141
- if randomize_seed or seed < 0:
142
- seed = random.randint(0, MAX_SEED)
143
-
144
- # Overlap the AOTI block-patch with upsampling: the transformer is idle while we upsample.
 
 
 
 
 
 
 
 
 
 
 
 
145
  aoti_thread = Thread(target=_apply_aoti, daemon=True)
146
  aoti_thread.start()
147
-
148
- # Always upsample. Prefer Ideogram's hosted magic-prompt; fall back to the local Qwen graft on any failure.
149
- use_remote = upsampler == UPSAMPLERS[0] and bool(IDEOGRAM_API_KEY)
150
- final_prompt = prompt
151
- if use_remote:
152
- progress(0.0, desc="✍️ Upsampling (Ideogram)…")
153
- t = time.perf_counter()
154
- try:
155
- final_prompt = remote_upsample(prompt, int(width), int(height))
156
- print(f"[timing] upsample remote: {time.perf_counter() - t:.2f}s", flush=True)
157
- except Exception as e:
158
- print(f"[upsample] remote failed, falling back to local: {e!r}", flush=True)
159
- gr.Warning("Ideogram API unavailable — using the local Qwen upsampler.")
160
- use_remote = False
161
- if not use_remote:
162
  progress(0.0, desc="✍️ Upsampling (local Qwen)…")
163
  t = time.perf_counter()
164
  try:
165
  final_prompt = pipe.upsample_prompt(
166
- prompt, height=int(height), width=int(width), lm_head_repo_id=LM_HEAD_REPO
167
  )[0]
168
  print(f"[timing] upsample local: {time.perf_counter() - t:.2f}s", flush=True)
169
  except Exception as e:
170
  print(f"[upsample] local failed: {e!r}", flush=True)
171
  gr.Warning("Local upsampler unavailable — generating from the raw prompt.")
172
-
173
  aoti_thread.join() # ensure blocks are patched before the diffusion loop
174
- print(f"[timing] pre-diffusion (enter -> ready): {time.perf_counter() - t_enter:.2f}s", flush=True)
175
 
176
  progress(0.0, desc="🎨 Generating image…")
177
  generator = torch.Generator(device="cuda").manual_seed(int(seed))
@@ -184,7 +180,36 @@ def generate(
184
  caption = json.loads(final_prompt)
185
  except Exception:
186
  caption = {"prompt": final_prompt}
187
- return image, seed, caption
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
 
190
  @spaces.GPU(size="xlarge")
@@ -201,10 +226,8 @@ try:
201
  except Exception as e: # a flaky ZeroGPU worker must not take down the Space
202
  print(f"[warmup] failed (will warm lazily on first request): {e!r}", flush=True)
203
 
204
- CSS='''
205
- .dark .gradio-container { color: var(--body-text-color); }
206
- '''
207
- with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4", css=CSS) as demo:
208
  gr.Markdown(
209
  "# Ideogram 4\n"
210
  "Ideogram's first open-weights model — a 9.3B-parameter text-to-image foundation model at the "
@@ -255,4 +278,4 @@ with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4", css=CSS) as demo:
255
  outputs=[out_image, seed, out_caption],
256
  )
257
 
258
- demo.launch()
 
126
  return json.dumps(jp, ensure_ascii=False, separators=(",", ":"))
127
 
128
 
129
+ # --- Dynamic GPU duration ---------------------------------------------------------------------------------
130
+ # Per-step diffusion time, linear in image tokens between the two measured anchors (1024 @ 1.10 it/s,
131
+ # 2048 @ 6 s/it). The chord overestimates in between, so it's a safe budget; clamped low for small images.
132
+ # Remote upsample is a network call done OFF the GPU (in `generate`), so it isn't budgeted here.
133
+ _TOK_1024, _TOK_2048 = (1024 // 16) ** 2, (2048 // 16) ** 2 # 4096, 16384 image tokens
134
+ _PS_1024, _PS_2048 = 1.0 / 1.10, 6.0 # measured seconds/iteration
135
+ _PS_B = (_PS_2048 - _PS_1024) / (_TOK_2048 - _TOK_1024)
136
+ _PS_A = _PS_1024 - _PS_B * _TOK_1024
137
+ LOCAL_UPSAMPLE_S = 15 # local Qwen graft+generate (~12s) with headroom
138
+ DIFFUSION_OVERHEAD_S = 8 # .so dlopen + block patch + cudnn setup on a cold worker's first forward
139
+ DURATION_MARGIN = 1.3
140
+
141
+
142
+ def _per_step(width, height):
143
+ return max(0.2, _PS_A + _PS_B * ((int(width) // 16) * (int(height) // 16)))
144
+
145
+
146
+ def _gpu_duration(final_prompt, mode, width, height, seed, do_local, progress=None):
147
+ steps = MODES.get(mode, MODES["Default · 20 steps"])["num_inference_steps"]
148
+ budget = steps * _per_step(width, height) + DIFFUSION_OVERHEAD_S
149
+ if do_local:
150
+ budget += LOCAL_UPSAMPLE_S
151
+ return max(30, int(math.ceil(budget * DURATION_MARGIN)))
152
+
153
+
154
+ @spaces.GPU(duration=_gpu_duration, size="xlarge")
155
+ def _gpu_generate(final_prompt, mode, width, height, seed, do_local, progress=gr.Progress(track_tqdm=True)):
156
+ # Overlap the AOTI block-patch with the (transformer-idle) local upsample, if any.
157
  aoti_thread = Thread(target=_apply_aoti, daemon=True)
158
  aoti_thread.start()
159
+ if do_local:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  progress(0.0, desc="✍️ Upsampling (local Qwen)…")
161
  t = time.perf_counter()
162
  try:
163
  final_prompt = pipe.upsample_prompt(
164
+ final_prompt, height=int(height), width=int(width), lm_head_repo_id=LM_HEAD_REPO
165
  )[0]
166
  print(f"[timing] upsample local: {time.perf_counter() - t:.2f}s", flush=True)
167
  except Exception as e:
168
  print(f"[upsample] local failed: {e!r}", flush=True)
169
  gr.Warning("Local upsampler unavailable — generating from the raw prompt.")
 
170
  aoti_thread.join() # ensure blocks are patched before the diffusion loop
 
171
 
172
  progress(0.0, desc="🎨 Generating image…")
173
  generator = torch.Generator(device="cuda").manual_seed(int(seed))
 
180
  caption = json.loads(final_prompt)
181
  except Exception:
182
  caption = {"prompt": final_prompt}
183
+ return image, int(seed), caption
184
+
185
+
186
+ def generate(
187
+ prompt,
188
+ mode="Default · 20 steps",
189
+ upsampler=UPSAMPLERS[0],
190
+ width=1024,
191
+ height=1024,
192
+ seed=0,
193
+ randomize_seed=False,
194
+ progress=gr.Progress(track_tqdm=True),
195
+ ):
196
+ if randomize_seed or seed < 0:
197
+ seed = random.randint(0, MAX_SEED)
198
+
199
+ # Remote upsample is a network call -> run it here, OFF the GPU. Fall back to local (on-GPU) on failure.
200
+ final_prompt, do_local = prompt, True
201
+ if upsampler == UPSAMPLERS[0] and IDEOGRAM_API_KEY:
202
+ progress(0.0, desc="✍️ Upsampling (Ideogram)…")
203
+ t = time.perf_counter()
204
+ try:
205
+ final_prompt = remote_upsample(prompt, int(width), int(height))
206
+ do_local = False
207
+ print(f"[timing] upsample remote (off-GPU): {time.perf_counter() - t:.2f}s", flush=True)
208
+ except Exception as e:
209
+ print(f"[upsample] remote failed, falling back to local: {e!r}", flush=True)
210
+ gr.Warning("Ideogram API unavailable — using the local Qwen upsampler.")
211
+
212
+ return _gpu_generate(final_prompt, mode, width, height, seed, do_local)
213
 
214
 
215
  @spaces.GPU(size="xlarge")
 
226
  except Exception as e: # a flaky ZeroGPU worker must not take down the Space
227
  print(f"[warmup] failed (will warm lazily on first request): {e!r}", flush=True)
228
 
229
+
230
+ with gr.Blocks(theme=gr.themes.Citrus(), title="Ideogram 4") as demo:
 
 
231
  gr.Markdown(
232
  "# Ideogram 4\n"
233
  "Ideogram's first open-weights model — a 9.3B-parameter text-to-image foundation model at the "
 
278
  outputs=[out_image, seed, out_caption],
279
  )
280
 
281
+ demo.queue().launch()