sabannna commited on
Commit
cc874ea
·
verified ·
1 Parent(s): 93e2bb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -42
app.py CHANGED
@@ -1,9 +1,13 @@
1
  import os
 
 
 
2
  import gradio as gr
3
  import numpy as np
4
  import spaces
5
  import torch
6
  import random
 
7
  from PIL import Image
8
  from typing import Iterable
9
  from gradio.themes import Soft
@@ -131,6 +135,16 @@ pipe.load_lora_weights("vafipas663/Qwen-Edit-2509-Upscale-LoRA",
131
 
132
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
133
 
 
 
 
 
 
 
 
 
 
 
134
  MAX_SEED = np.iinfo(np.int32).max
135
 
136
  def _round8(x: int) -> int:
@@ -138,7 +152,6 @@ def _round8(x: int) -> int:
138
  return max(8, (x // 8) * 8)
139
 
140
  def fit_long_side(image: Image.Image, long_side: int):
141
- """Keep aspect ratio; set long side; round down to mult of 8."""
142
  w0, h0 = image.size
143
  long_side = _round8(long_side)
144
  if w0 >= h0:
@@ -180,6 +193,7 @@ def upload_image_to_hub(image, dataset_id, folder_prefix="images"):
180
  print(f"Yükleme hatası ({dataset_id}): {e}")
181
  # -----------------
182
 
 
183
  SIZE_PRESETS = [
184
  "Smart Auto (closest base + scale)",
185
  "Auto (fit long side to 1024)",
@@ -191,17 +205,17 @@ SIZE_PRESETS = [
191
  ]
192
 
193
  SCALE_CHOICES = ["Auto", "0.5x", "0.75x", "1.0x", "1.25x", "1.5x"]
194
-
195
- SMART_BASE_LONG_SIDES = [512, 768, 1024, 1280, 1536] # “元に一番近い base”
196
- SMART_SCALE_CANDIDATES = [0.5, 0.75, 1.0, 1.25, 1.5] # “縮尺指定”
197
- SMART_MAX_LONG_SIDE = 1536 # 自動はここまで(手動はスライダで2048までOK)
198
 
199
  def parse_scale(scale_choice: str):
200
  if scale_choice == "Auto":
201
  return None
202
  return float(scale_choice.replace("x", "").strip())
203
 
204
- def smart_auto_size(image: Image.Image, scale_choice: str):
205
  if image is None:
206
  return 1024, 1024, "No image"
207
 
@@ -209,55 +223,55 @@ def smart_auto_size(image: Image.Image, scale_choice: str):
209
  w0, h0 = img.size
210
  long0 = max(w0, h0)
211
 
212
- # 1) base: 元画像の長辺に最も近いもの
213
- # タイブレークは「小さい方」優先(無駄なアップスケールを避ける)
214
  base = min(
215
  SMART_BASE_LONG_SIDES,
216
  key=lambda b: (abs(b - long0), 0 if b <= long0 else 1, b)
217
  )
218
 
219
- # 2) scale: “いい感じ”に近づく縮尺を選ぶ(Auto の場合)
220
  s_user = parse_scale(scale_choice)
 
 
221
  if s_user is not None:
222
  cand_long = int(base * s_user)
223
  cand_long = max(256, min(cand_long, 2048))
 
224
  w, h = fit_long_side(img, cand_long)
225
- info = f"Smart(base={base}, scale={s_user}x) -> {w}x{h} (orig {w0}x{h0})"
226
  return w, h, info
227
 
228
- # Auto scale selection with mild anti-upscale penalty
229
  best = None
230
  for s in SMART_SCALE_CANDIDATES:
231
  cand_long = int(base * s)
232
  if cand_long < 256:
233
  continue
234
- if cand_long > SMART_MAX_LONG_SIDE:
235
  continue
236
 
237
  diff = abs(cand_long - long0)
238
  upscale_penalty = 0
239
  if cand_long > long0:
240
- upscale_penalty = (cand_long - long0) * 2.5 # アップスケールはやや不利に
241
  cost = diff + upscale_penalty
242
 
243
  if best is None or cost < best[0]:
244
  best = (cost, s, cand_long)
245
 
246
- # フォールバック
247
  if best is None:
248
- cand_long = min(max(256, base), SMART_MAX_LONG_SIDE)
249
  w, h = fit_long_side(img, cand_long)
250
- info = f"Smart(base={base}, scale=Fallback) -> {w}x{h} (orig {w0}x{h0})"
251
  return w, h, info
252
 
253
  _, s_best, long_best = best
254
  w, h = fit_long_side(img, long_best)
255
- info = f"Smart(base={base}, scale={s_best}x Auto) -> {w}x{h} (orig {w0}x{h0})"
256
  return w, h, info
257
 
258
- def apply_size_controls(preset, image, scale_choice, cur_w, cur_h):
 
 
259
  if preset == "Smart Auto (closest base + scale)":
260
- w, h, info = smart_auto_size(image, scale_choice)
261
  return w, h, info
262
 
263
  if preset == "Auto (fit long side to 1024)":
@@ -275,9 +289,9 @@ def apply_size_controls(preset, image, scale_choice, cur_w, cur_h):
275
  if preset == "512 x 512 (Fast)":
276
  return 512, 512, "Fixed 512x512"
277
 
278
- # Custom
279
  return _round8(cur_w), _round8(cur_h), f"Custom -> {_round8(cur_w)}x{_round8(cur_h)}"
280
 
 
281
  def set_adapter(lora_adapter: str):
282
  if lora_adapter == "Photo-to-Anime":
283
  pipe.set_adapters(["anime"], adapter_weights=[1.0])
@@ -296,7 +310,12 @@ def set_adapter(lora_adapter: str):
296
  elif lora_adapter == "Upscale-Image":
297
  pipe.set_adapters(["upscale-image"], adapter_weights=[1.0])
298
 
299
- @spaces.GPU(duration=60)
 
 
 
 
 
300
  def infer_6pack(
301
  input_image,
302
  prompt1,
@@ -305,6 +324,7 @@ def infer_6pack(
305
  lora_adapter,
306
  size_preset,
307
  scale_choice,
 
308
  width,
309
  height,
310
  seed,
@@ -325,7 +345,7 @@ def infer_6pack(
325
 
326
  prompts = [prompt1, prompt2, prompt3]
327
 
328
- # Seeds (2 per prompt => 6)
329
  seeds = []
330
  if randomize_seed:
331
  for _ in range(6):
@@ -335,13 +355,21 @@ def infer_6pack(
335
  for i in range(6):
336
  seeds.append((base + i) % MAX_SEED)
337
 
338
- negative_prompt = (
339
- "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
340
- "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
341
- )
 
 
 
 
342
 
343
  original_image = input_image.convert("RGB")
344
 
 
 
 
 
345
  outputs = []
346
  seed_idx = 0
347
  for p_i, p in enumerate(prompts):
@@ -350,22 +378,32 @@ def infer_6pack(
350
  seed_idx += 1
351
 
352
  generator = torch.Generator(device=device).manual_seed(int(s))
353
- result = pipe(
 
354
  image=original_image,
355
  prompt=p,
356
- negative_prompt=negative_prompt,
357
  height=int(height),
358
  width=int(width),
359
  num_inference_steps=int(steps),
360
  generator=generator,
361
  true_cfg_scale=float(guidance_scale),
362
- ).images[0]
 
 
 
 
363
 
364
  upload_image_to_hub(result, OUTPUT_DATASET_ID, folder_prefix="generated")
365
 
366
  caption = f"prompt{p_i+1} var{v+1} | seed={s} | {width}x{height}"
367
  outputs.append((result, caption))
368
 
 
 
 
 
 
 
369
  seeds_text = "\n".join([f"{i+1}: {s}" for i, s in enumerate(seeds)])
370
  return outputs, seeds_text
371
 
@@ -398,15 +436,22 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
398
  value="Auto",
399
  )
400
 
 
 
 
 
 
 
401
  with gr.Row():
402
  width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
403
  height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
404
 
405
  size_info = gr.Textbox(label="Size Decision Info", lines=2)
406
 
 
407
  prompt1 = gr.Text(
408
  label="Prompt 1",
409
- value="move camera to below floor, make this girl to another standing pose, dynamic camera anble from below",
410
  )
411
  prompt2 = gr.Text(
412
  label="Prompt 2",
@@ -417,6 +462,26 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
417
  value="make this girl to another standing pose with hand sign",
418
  )
419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  run_button = gr.Button("Generate 6 Images (3 prompts x 2 seeds)", variant="primary")
421
 
422
  with gr.Column():
@@ -451,26 +516,31 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
451
 
452
  seeds_box = gr.Textbox(label="Used Seeds (1..6)", lines=6)
453
 
454
- # サイズUI更新:preset/scale/画像アップロードで追従
455
- for evt in (size_preset.change, scale_choice.change, input_image.change):
 
 
 
456
  evt(
457
- fn=apply_size_controls,
458
- inputs=[size_preset, input_image, scale_choice, width, height],
459
  outputs=[width, height, size_info],
460
  )
461
 
 
 
 
 
 
 
 
 
462
  run_button.click(
463
  fn=infer_6pack,
464
  inputs=[
465
  input_image,
466
  prompt1, prompt2, prompt3,
467
  lora_adapter,
468
- size_preset, scale_choice,
469
  width, height,
470
- seed, randomize_seed, guidance_scale, steps,
471
- ],
472
- outputs=[output_gallery, seeds_box],
473
- )
474
-
475
- if __name__ == "__main__":
476
- demo.queue(max_size=30).launch(mcp_server=True, ssr_mode=False, show_error=True)
 
1
  import os
2
+ # ★ torch import 前に allocator 設定(ZeroGPU/断片化対策)
3
+ os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:128")
4
+
5
  import gradio as gr
6
  import numpy as np
7
  import spaces
8
  import torch
9
  import random
10
+ import gc
11
  from PIL import Image
12
  from typing import Iterable
13
  from gradio.themes import Soft
 
135
 
136
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
137
 
138
+ # ★ メモリ節約(対応していれば効く)
139
+ try:
140
+ pipe.enable_vae_slicing()
141
+ except Exception:
142
+ pass
143
+ try:
144
+ pipe.enable_attention_slicing("auto")
145
+ except Exception:
146
+ pass
147
+
148
  MAX_SEED = np.iinfo(np.int32).max
149
 
150
  def _round8(x: int) -> int:
 
152
  return max(8, (x // 8) * 8)
153
 
154
  def fit_long_side(image: Image.Image, long_side: int):
 
155
  w0, h0 = image.size
156
  long_side = _round8(long_side)
157
  if w0 >= h0:
 
193
  print(f"Yükleme hatası ({dataset_id}): {e}")
194
  # -----------------
195
 
196
+ # ===== Size logic =====
197
  SIZE_PRESETS = [
198
  "Smart Auto (closest base + scale)",
199
  "Auto (fit long side to 1024)",
 
205
  ]
206
 
207
  SCALE_CHOICES = ["Auto", "0.5x", "0.75x", "1.0x", "1.25x", "1.5x"]
208
+ SMART_BASE_LONG_SIDES = [512, 768, 1024, 1280, 1536]
209
+ SMART_SCALE_CANDIDATES = [0.5, 0.75, 1.0, 1.25, 1.5]
210
+ SMART_MAX_CHOICES = [768, 1024, 1280, 1536]
211
+ SMART_MAX_LONG_SIDE_DEFAULT = 1024 # ★安全側デフォルト
212
 
213
  def parse_scale(scale_choice: str):
214
  if scale_choice == "Auto":
215
  return None
216
  return float(scale_choice.replace("x", "").strip())
217
 
218
+ def smart_auto_size(image: Image.Image, scale_choice: str, smart_max_long: int):
219
  if image is None:
220
  return 1024, 1024, "No image"
221
 
 
223
  w0, h0 = img.size
224
  long0 = max(w0, h0)
225
 
 
 
226
  base = min(
227
  SMART_BASE_LONG_SIDES,
228
  key=lambda b: (abs(b - long0), 0 if b <= long0 else 1, b)
229
  )
230
 
 
231
  s_user = parse_scale(scale_choice)
232
+ smart_max_long = int(smart_max_long)
233
+
234
  if s_user is not None:
235
  cand_long = int(base * s_user)
236
  cand_long = max(256, min(cand_long, 2048))
237
+ cand_long = min(cand_long, smart_max_long)
238
  w, h = fit_long_side(img, cand_long)
239
+ info = f"Smart(base={base}, scale={s_user}x, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
240
  return w, h, info
241
 
 
242
  best = None
243
  for s in SMART_SCALE_CANDIDATES:
244
  cand_long = int(base * s)
245
  if cand_long < 256:
246
  continue
247
+ if cand_long > smart_max_long:
248
  continue
249
 
250
  diff = abs(cand_long - long0)
251
  upscale_penalty = 0
252
  if cand_long > long0:
253
+ upscale_penalty = (cand_long - long0) * 2.5
254
  cost = diff + upscale_penalty
255
 
256
  if best is None or cost < best[0]:
257
  best = (cost, s, cand_long)
258
 
 
259
  if best is None:
260
+ cand_long = min(max(256, base), smart_max_long)
261
  w, h = fit_long_side(img, cand_long)
262
+ info = f"Smart(base={base}, scale=Fallback, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
263
  return w, h, info
264
 
265
  _, s_best, long_best = best
266
  w, h = fit_long_side(img, long_best)
267
+ info = f"Smart(base={base}, scale={s_best}x Auto, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
268
  return w, h, info
269
 
270
+ def apply_size_controls(preset, image, scale_choice, smart_max_long, cur_w, cur_h):
271
+ smart_max_long = int(smart_max_long)
272
+
273
  if preset == "Smart Auto (closest base + scale)":
274
+ w, h, info = smart_auto_size(image, scale_choice, smart_max_long)
275
  return w, h, info
276
 
277
  if preset == "Auto (fit long side to 1024)":
 
289
  if preset == "512 x 512 (Fast)":
290
  return 512, 512, "Fixed 512x512"
291
 
 
292
  return _round8(cur_w), _round8(cur_h), f"Custom -> {_round8(cur_w)}x{_round8(cur_h)}"
293
 
294
+ # ===== LoRA =====
295
  def set_adapter(lora_adapter: str):
296
  if lora_adapter == "Photo-to-Anime":
297
  pipe.set_adapters(["anime"], adapter_weights=[1.0])
 
310
  elif lora_adapter == "Upscale-Image":
311
  pipe.set_adapters(["upscale-image"], adapter_weights=[1.0])
312
 
313
+ # ===== Prompt swap =====
314
+ def swap_prompt_sets(p1, p2, p3, p4, p5, p6):
315
+ return p4, p5, p6, p1, p2, p3
316
+
317
+ # ===== Inference (6 images) =====
318
+ @spaces.GPU(duration=120)
319
  def infer_6pack(
320
  input_image,
321
  prompt1,
 
324
  lora_adapter,
325
  size_preset,
326
  scale_choice,
327
+ smart_max_long,
328
  width,
329
  height,
330
  seed,
 
345
 
346
  prompts = [prompt1, prompt2, prompt3]
347
 
348
+ # seeds: 2 per prompt => 6
349
  seeds = []
350
  if randomize_seed:
351
  for _ in range(6):
 
355
  for i in range(6):
356
  seeds.append((base + i) % MAX_SEED)
357
 
358
+ # true_cfg_scale<=1 のときは negative_prompt 渡さない(警告&無駄回避)
359
+ guidance_scale = float(guidance_scale)
360
+ negative_prompt = None
361
+ if guidance_scale > 1.0:
362
+ negative_prompt = (
363
+ "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
364
+ "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
365
+ )
366
 
367
  original_image = input_image.convert("RGB")
368
 
369
+ if torch.cuda.is_available():
370
+ torch.cuda.empty_cache()
371
+ gc.collect()
372
+
373
  outputs = []
374
  seed_idx = 0
375
  for p_i, p in enumerate(prompts):
 
378
  seed_idx += 1
379
 
380
  generator = torch.Generator(device=device).manual_seed(int(s))
381
+
382
+ call_kwargs = dict(
383
  image=original_image,
384
  prompt=p,
 
385
  height=int(height),
386
  width=int(width),
387
  num_inference_steps=int(steps),
388
  generator=generator,
389
  true_cfg_scale=float(guidance_scale),
390
+ )
391
+ if negative_prompt is not None:
392
+ call_kwargs["negative_prompt"] = negative_prompt
393
+
394
+ result = pipe(**call_kwargs).images[0]
395
 
396
  upload_image_to_hub(result, OUTPUT_DATASET_ID, folder_prefix="generated")
397
 
398
  caption = f"prompt{p_i+1} var{v+1} | seed={s} | {width}x{height}"
399
  outputs.append((result, caption))
400
 
401
+ # ★ 連続生成のメモリ圧を下げる
402
+ del generator
403
+ if torch.cuda.is_available():
404
+ torch.cuda.empty_cache()
405
+ gc.collect()
406
+
407
  seeds_text = "\n".join([f"{i+1}: {s}" for i, s in enumerate(seeds)])
408
  return outputs, seeds_text
409
 
 
436
  value="Auto",
437
  )
438
 
439
+ smart_max_long = gr.Dropdown(
440
+ label="Smart Max Long Side (Safe default 1024)",
441
+ choices=[str(x) for x in SMART_MAX_CHOICES],
442
+ value=str(SMART_MAX_LONG_SIDE_DEFAULT),
443
+ )
444
+
445
  with gr.Row():
446
  width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
447
  height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
448
 
449
  size_info = gr.Textbox(label="Size Decision Info", lines=2)
450
 
451
+ # ---- main prompts (1-3) ----
452
  prompt1 = gr.Text(
453
  label="Prompt 1",
454
+ value="move camera to below floor, make this girl to another standing pose, dynamic camera angle from below",
455
  )
456
  prompt2 = gr.Text(
457
  label="Prompt 2",
 
462
  value="make this girl to another standing pose with hand sign",
463
  )
464
 
465
+ # ---- swap buttons ----
466
+ with gr.Row():
467
+ swap_left = gr.Button("◀", variant="secondary")
468
+ swap_right = gr.Button("▶", variant="secondary")
469
+
470
+ # ---- alt prompts (4-6) ----
471
+ with gr.Accordion("Alt Prompts (4-6)", open=False):
472
+ prompt4 = gr.Text(
473
+ label="Prompt 4",
474
+ value="camera zoom in to her face, cute face with smiling, aesthetics image film,",
475
+ )
476
+ prompt5 = gr.Text(
477
+ label="Prompt 5",
478
+ value="camera zoom out and she split legs, cute posing",
479
+ )
480
+ prompt6 = gr.Text(
481
+ label="Prompt 6",
482
+ value="camera move to up, she look at another, and sitting,",
483
+ )
484
+
485
  run_button = gr.Button("Generate 6 Images (3 prompts x 2 seeds)", variant="primary")
486
 
487
  with gr.Column():
 
516
 
517
  seeds_box = gr.Textbox(label="Used Seeds (1..6)", lines=6)
518
 
519
+ # サイズUI更新:preset/scale/max/画像アップロードで追従
520
+ def _size_update(preset, img, scale, mx, w, h):
521
+ return apply_size_controls(preset, img, scale, mx, w, h)
522
+
523
+ for evt in (size_preset.change, scale_choice.change, smart_max_long.change, input_image.change):
524
  evt(
525
+ fn=_size_update,
526
+ inputs=[size_preset, input_image, scale_choice, smart_max_long, width, height],
527
  outputs=[width, height, size_info],
528
  )
529
 
530
+ # 左右ボタン:prompt1-3 <-> prompt4-6 を swap
531
+ for btn in (swap_left, swap_right):
532
+ btn.click(
533
+ fn=swap_prompt_sets,
534
+ inputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6],
535
+ outputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6],
536
+ )
537
+
538
  run_button.click(
539
  fn=infer_6pack,
540
  inputs=[
541
  input_image,
542
  prompt1, prompt2, prompt3,
543
  lora_adapter,
544
+ size_preset, scale_choice, smart_max_long,
545
  width, height,
546
+ seed, randomize_seed, guidance_scale,