Fabrice-TIERCELIN commited on
Commit
f3f56fc
·
verified ·
1 Parent(s): 95653cd

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +8 -17
  2. app.py +491 -1057
  3. optimization.py +2 -0
  4. requirements.txt +10 -42
README.md CHANGED
@@ -1,21 +1,12 @@
1
  ---
2
- title: SUPIR Image Upscaler
 
 
 
3
  sdk: gradio
4
- emoji: 📷
5
  sdk_version: 5.29.1
6
  app_file: app.py
7
- license: mit
8
- colorFrom: blue
9
- colorTo: pink
10
- tags:
11
- - Upscaling
12
- - Restoring
13
- - Image-to-Image
14
- - Image-2-Image
15
- - Img-to-Img
16
- - Img-2-Img
17
- - language models
18
- - LLMs
19
- short_description: Restore blurred or small images with prompt
20
- suggested_hardware: zero-a10g
21
- ---
 
1
  ---
2
+ title: Wan 2 2 First Last Frame
3
+ emoji: 💻
4
+ colorFrom: purple
5
+ colorTo: gray
6
  sdk: gradio
 
7
  sdk_version: 5.29.1
8
  app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,1057 +1,491 @@
1
- import os
2
- import gradio as gr
3
- import argparse
4
- import numpy as np
5
- import torch
6
- import einops
7
- import copy
8
- import math
9
- import time
10
- import random
11
- from datetime import datetime
12
-
13
- try:
14
- import spaces
15
- except:
16
- class spaces():
17
- def GPU(*args, **kwargs):
18
- def decorator(function):
19
- return lambda *dummy_args, **dummy_kwargs: function(*dummy_args, **dummy_kwargs)
20
- return decorator
21
-
22
- import re
23
- import uuid
24
-
25
- from gradio_imageslider import ImageSlider
26
- from PIL import Image
27
- import imageio.v3 as iio
28
- from SUPIR.util import HWC3, upscale_image, fix_resize, convert_dtype, create_SUPIR_model, load_QF_ckpt
29
- from huggingface_hub import hf_hub_download
30
- import pillow_heif
31
-
32
- pillow_heif.register_heif_opener()
33
-
34
- max_64_bit_int = np.iinfo(np.int32).max
35
-
36
- hf_hub_download(repo_id="laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", filename="open_clip_pytorch_model.bin", local_dir="laion_CLIP-ViT-bigG-14-laion2B-39B-b160k")
37
- hf_hub_download(repo_id="camenduru/SUPIR", filename="sd_xl_base_1.0_0.9vae.safetensors", local_dir="yushan777_SUPIR")
38
- hf_hub_download(repo_id="camenduru/SUPIR", filename="SUPIR-v0F.ckpt", local_dir="yushan777_SUPIR")
39
- hf_hub_download(repo_id="camenduru/SUPIR", filename="SUPIR-v0Q.ckpt", local_dir="yushan777_SUPIR")
40
- hf_hub_download(repo_id="RunDiffusion/Juggernaut-XL-Lightning", filename="Juggernaut_RunDiffusionPhoto2_Lightning_4Steps.safetensors", local_dir="RunDiffusion_Juggernaut-XL-Lightning")
41
-
42
- parser = argparse.ArgumentParser()
43
- parser.add_argument("--opt", type=str, default='options/SUPIR_v0.yaml')
44
- parser.add_argument("--ip", type=str, default='127.0.0.1')
45
- parser.add_argument("--port", type=int, default='6688')
46
- parser.add_argument("--no_llava", action='store_true', default=True)#False
47
- parser.add_argument("--use_image_slider", action='store_true', default=False)#False
48
- parser.add_argument("--log_history", action='store_true', default=False)
49
- parser.add_argument("--loading_half_params", action='store_true', default=False)#False
50
- parser.add_argument("--use_tile_vae", action='store_true', default=True)#False
51
- parser.add_argument("--encoder_tile_size", type=int, default=512)
52
- parser.add_argument("--decoder_tile_size", type=int, default=64)
53
- parser.add_argument("--load_8bit_llava", action='store_true', default=False)
54
- args = parser.parse_args()
55
-
56
- input_image_debug_value = [None]
57
- prompt_debug_value = [None]
58
- upscale_debug_value = [None]
59
-
60
- if torch.cuda.device_count() > 0:
61
- SUPIR_device = 'cuda:0'
62
-
63
- # Load SUPIR
64
- model, default_setting = create_SUPIR_model(args.opt, SUPIR_sign='Q', load_default_setting=True)
65
- if args.loading_half_params:
66
- model = model.half()
67
- if args.use_tile_vae:
68
- model.init_tile_vae(encoder_tile_size=args.encoder_tile_size, decoder_tile_size=args.decoder_tile_size)
69
- model = model.to(SUPIR_device)
70
- model.first_stage_model.denoise_encoder_s1 = copy.deepcopy(model.first_stage_model.denoise_encoder)
71
- model.current_model = 'v0-Q'
72
- ckpt_Q, ckpt_F = load_QF_ckpt(args.opt)
73
-
74
- def check_upload(input_image):
75
- if input_image is None:
76
- raise gr.Error("Please provide an image to restore.")
77
- return gr.update(visible = True)
78
-
79
- def update_seed(is_randomize_seed, seed):
80
- if is_randomize_seed:
81
- return random.randint(0, max_64_bit_int)
82
- return seed
83
-
84
- def reset():
85
- return [
86
- None,
87
- 0,
88
- None,
89
- None,
90
- "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
91
- "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
92
- 1,
93
- 1024,
94
- 1,
95
- 2,
96
- 50,
97
- -1.0,
98
- 1.,
99
- default_setting.s_cfg_Quality if torch.cuda.device_count() > 0 else 1.0,
100
- True,
101
- random.randint(0, max_64_bit_int),
102
- 5,
103
- 1.003,
104
- "Wavelet",
105
- "fp32",
106
- "fp32",
107
- 1.0,
108
- True,
109
- default_setting.spt_linear_CFG_Quality if torch.cuda.device_count() > 0 else 1.0,
110
- False,
111
- 0.,
112
- "v0-Q",
113
- "input",
114
- 179
115
- ]
116
-
117
- def check_and_update(input_image):
118
- if input_image is None:
119
- raise gr.Error("Please provide an image to restore.")
120
- return [gr.update(visible = True), gr.update(interactive = True)]
121
-
122
- @spaces.GPU(duration=180)
123
- def stage1_process(
124
- input_image,
125
- gamma_correction,
126
- diff_dtype,
127
- ae_dtype
128
- ):
129
- print('stage1_process ==>>')
130
- if torch.cuda.device_count() == 0:
131
- gr.Warning('Set this space to GPU config to make it work.')
132
- return None, None, gr.update(interactive = False)
133
- torch.cuda.set_device(SUPIR_device)
134
- LQ = HWC3(np.array(Image.open(input_image)))
135
- LQ = fix_resize(LQ, 512)
136
- # stage1
137
- LQ = np.array(LQ) / 255 * 2 - 1
138
- LQ = torch.tensor(LQ, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0).to(SUPIR_device)[:, :3, :, :]
139
-
140
- model.ae_dtype = convert_dtype(ae_dtype)
141
- model.model.dtype = convert_dtype(diff_dtype)
142
-
143
- LQ = model.batchify_denoise(LQ, is_stage1=True)
144
- LQ = (LQ[0].permute(1, 2, 0) * 127.5 + 127.5).cpu().numpy().round().clip(0, 255).astype(np.uint8)
145
- # gamma correction
146
- LQ = LQ / 255.0
147
- LQ = np.power(LQ, gamma_correction)
148
- LQ *= 255.0
149
- LQ = LQ.round().clip(0, 255).astype(np.uint8)
150
- print('<<== stage1_process')
151
- return LQ, gr.update(visible = True)
152
-
153
- def stage2_process_example(*args, **kwargs):
154
- [result_slider, result_gallery, restore_information, reset_btn, warning, dummy_button] = restore_in_Xmin(*args, **kwargs)
155
- #outputs_folder = './outputs/'
156
- outputs_folder = './tmp/'
157
- os.makedirs(outputs_folder, exist_ok=True)
158
- output_filename = os.path.join(outputs_folder, datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + '.png')
159
- print(output_filename)
160
- iio.imwrite(output_filename, result_slider[1], format="png")
161
- return [gr.update(visible = True, value=output_filename), warning, dummy_button, gr.skip()]
162
-
163
- def stage2_process(*args, **kwargs):
164
- try:
165
- return restore_in_Xmin(*args, **kwargs)
166
- except Exception as e:
167
- # NO_GPU_MESSAGE_INQUEUE
168
- print("gradio.exceptions.Error 'No GPU is currently available for you after 60s'")
169
- print('str(type(e)): ' + str(type(e))) # <class 'gradio.exceptions.Error'>
170
- print('str(e): ' + str(e)) # You have exceeded your GPU quota...
171
- try:
172
- print('e.message: ' + e.message) # No GPU is currently available for you after 60s
173
- except Exception as e2:
174
- print('Failure')
175
- if str(e).startswith("No GPU is currently available for you after 60s"):
176
- print('Exception identified!!!')
177
- #if str(type(e)) == "<class 'gradio.exceptions.Error'>":
178
- #print('Exception of name ' + type(e).__name__)
179
- raise e
180
-
181
- def restore_in_Xmin(
182
- noisy_image,
183
- rotation,
184
- denoise_image,
185
- prompt,
186
- a_prompt,
187
- n_prompt,
188
- num_samples,
189
- min_size,
190
- downscale,
191
- upscale,
192
- edm_steps,
193
- s_stage1,
194
- s_stage2,
195
- s_cfg,
196
- randomize_seed,
197
- seed,
198
- s_churn,
199
- s_noise,
200
- color_fix_type,
201
- diff_dtype,
202
- ae_dtype,
203
- gamma_correction,
204
- linear_CFG,
205
- spt_linear_CFG,
206
- linear_s_stage2,
207
- spt_linear_s_stage2,
208
- model_select,
209
- output_format,
210
- allocation
211
- ):
212
- print("noisy_image:\n" + str(noisy_image))
213
- print("denoise_image:\n" + str(denoise_image))
214
- print("rotation: " + str(rotation))
215
- print("prompt: " + str(prompt))
216
- print("a_prompt: " + str(a_prompt))
217
- print("n_prompt: " + str(n_prompt))
218
- print("num_samples: " + str(num_samples))
219
- print("min_size: " + str(min_size))
220
- print("downscale: " + str(downscale))
221
- print("upscale: " + str(upscale))
222
- print("edm_steps: " + str(edm_steps))
223
- print("s_stage1: " + str(s_stage1))
224
- print("s_stage2: " + str(s_stage2))
225
- print("s_cfg: " + str(s_cfg))
226
- print("randomize_seed: " + str(randomize_seed))
227
- print("seed: " + str(seed))
228
- print("s_churn: " + str(s_churn))
229
- print("s_noise: " + str(s_noise))
230
- print("color_fix_type: " + str(color_fix_type))
231
- print("diff_dtype: " + str(diff_dtype))
232
- print("ae_dtype: " + str(ae_dtype))
233
- print("gamma_correction: " + str(gamma_correction))
234
- print("linear_CFG: " + str(linear_CFG))
235
- print("linear_s_stage2: " + str(linear_s_stage2))
236
- print("spt_linear_CFG: " + str(spt_linear_CFG))
237
- print("spt_linear_s_stage2: " + str(spt_linear_s_stage2))
238
- print("model_select: " + str(model_select))
239
- print("GPU time allocation: " + str(allocation) + " min")
240
- print("output_format: " + str(output_format))
241
-
242
- if input_image_debug_value[0] is not None or prompt_debug_value[0] is not None or upscale_debug_value[0] is not None:
243
- denoise_image = noisy_image = input_image_debug_value[0]
244
- a_prompt = prompt_debug_value[0]
245
- upscale = upscale_debug_value[0]
246
- allocation = min(allocation * 60 * 100, 600)
247
- seed = random.randint(0, max_64_bit_int)
248
-
249
- input_format = re.sub(r"^.*\.([^\.]+)$", r"\1", noisy_image)
250
-
251
- if input_format not in ['png', 'webp', 'jpg', 'jpeg', 'gif', 'bmp', 'avif']:
252
- gr.Warning('Invalid image format. Please first convert into *.png, *.webp, *.jpg, *.jpeg, *.gif, *.bmp, *.heic or *.avif.')
253
- return None, None, None, None, None, gr.update(interactive = False)
254
-
255
- if output_format == "input":
256
- if noisy_image is None:
257
- output_format = "png"
258
- else:
259
- output_format = input_format
260
- print("final output_format: " + str(output_format))
261
-
262
- if prompt is None:
263
- prompt = ""
264
-
265
- if a_prompt is None:
266
- a_prompt = ""
267
-
268
- if n_prompt is None:
269
- n_prompt = ""
270
-
271
- if prompt != "" and a_prompt != "":
272
- a_prompt = prompt + ", " + a_prompt
273
- else:
274
- a_prompt = prompt + a_prompt
275
- print("Final prompt: " + str(a_prompt))
276
-
277
- denoise_image = np.array(Image.open(noisy_image if denoise_image is None else denoise_image))
278
-
279
- if rotation == 90:
280
- denoise_image = np.array(list(zip(*denoise_image[::-1])))
281
- elif rotation == 180:
282
- denoise_image = np.array(list(zip(*denoise_image[::-1])))
283
- denoise_image = np.array(list(zip(*denoise_image[::-1])))
284
- elif rotation == -90:
285
- denoise_image = np.array(list(zip(*denoise_image))[::-1])
286
-
287
- if 1 < downscale:
288
- input_height, input_width, input_channel = denoise_image.shape
289
- denoise_image = np.array(Image.fromarray(denoise_image).resize((input_width // downscale, input_height // downscale), Image.LANCZOS))
290
-
291
- denoise_image = HWC3(denoise_image)
292
-
293
- if torch.cuda.device_count() == 0:
294
- gr.Warning('Set this space to GPU config to make it work.')
295
- return [noisy_image, denoise_image], gr.update(label="Downloadable results in *." + output_format + " format", format = output_format, value = [denoise_image]), None, gr.update(visible=True), gr.skip(), gr.skip()
296
-
297
- if model_select != model.current_model:
298
- print('load ' + model_select)
299
- if model_select == 'v0-Q':
300
- model.load_state_dict(ckpt_Q, strict=False)
301
- elif model_select == 'v0-F':
302
- model.load_state_dict(ckpt_F, strict=False)
303
- model.current_model = model_select
304
-
305
- model.ae_dtype = convert_dtype(ae_dtype)
306
- model.model.dtype = convert_dtype(diff_dtype)
307
-
308
- return restore_on_gpu(
309
- noisy_image, denoise_image, prompt, a_prompt, n_prompt, num_samples, min_size, downscale, upscale, edm_steps, s_stage1, s_stage2, s_cfg, randomize_seed, seed, s_churn, s_noise, color_fix_type, diff_dtype, ae_dtype, gamma_correction, linear_CFG, linear_s_stage2, spt_linear_CFG, spt_linear_s_stage2, model_select, output_format, allocation
310
- )
311
-
312
- def get_duration(
313
- noisy_image,
314
- input_image,
315
- prompt,
316
- a_prompt,
317
- n_prompt,
318
- num_samples,
319
- min_size,
320
- downscale,
321
- upscale,
322
- edm_steps,
323
- s_stage1,
324
- s_stage2,
325
- s_cfg,
326
- randomize_seed,
327
- seed,
328
- s_churn,
329
- s_noise,
330
- color_fix_type,
331
- diff_dtype,
332
- ae_dtype,
333
- gamma_correction,
334
- linear_CFG,
335
- spt_linear_CFG,
336
- linear_s_stage2,
337
- spt_linear_s_stage2,
338
- model_select,
339
- output_format,
340
- allocation
341
- ):
342
- return allocation
343
-
344
- @spaces.GPU(duration=get_duration)
345
- def restore_on_gpu(
346
- noisy_image,
347
- input_image,
348
- prompt,
349
- a_prompt,
350
- n_prompt,
351
- num_samples,
352
- min_size,
353
- downscale,
354
- upscale,
355
- edm_steps,
356
- s_stage1,
357
- s_stage2,
358
- s_cfg,
359
- randomize_seed,
360
- seed,
361
- s_churn,
362
- s_noise,
363
- color_fix_type,
364
- diff_dtype,
365
- ae_dtype,
366
- gamma_correction,
367
- linear_CFG,
368
- spt_linear_CFG,
369
- linear_s_stage2,
370
- spt_linear_s_stage2,
371
- model_select,
372
- output_format,
373
- allocation
374
- ):
375
- start = time.time()
376
- print('restore ==>>')
377
-
378
- torch.cuda.set_device(SUPIR_device)
379
-
380
- with torch.no_grad():
381
- input_image = upscale_image(input_image, upscale, unit_resolution=32, min_size=min_size)
382
- LQ = np.array(input_image) / 255.0
383
- LQ = np.power(LQ, gamma_correction)
384
- LQ *= 255.0
385
- LQ = LQ.round().clip(0, 255).astype(np.uint8)
386
- LQ = LQ / 255 * 2 - 1
387
- LQ = torch.tensor(LQ, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0).to(SUPIR_device)[:, :3, :, :]
388
- captions = ['']
389
-
390
- samples = model.batchify_sample(LQ, captions, num_steps=edm_steps, restoration_scale=s_stage1, s_churn=s_churn,
391
- s_noise=s_noise, cfg_scale=s_cfg, control_scale=s_stage2, seed=seed,
392
- num_samples=num_samples, p_p=a_prompt, n_p=n_prompt, color_fix_type=color_fix_type,
393
- use_linear_CFG=linear_CFG, use_linear_control_scale=linear_s_stage2,
394
- cfg_scale_start=spt_linear_CFG, control_scale_start=spt_linear_s_stage2)
395
-
396
- x_samples = (einops.rearrange(samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().round().clip(
397
- 0, 255).astype(np.uint8)
398
- results = [x_samples[i] for i in range(num_samples)]
399
- torch.cuda.empty_cache()
400
-
401
- # All the results have the same size
402
- input_height, input_width, input_channel = np.array(input_image).shape
403
- result_height, result_width, result_channel = np.array(results[0]).shape
404
-
405
- print('<<== restore')
406
- end = time.time()
407
- secondes = int(end - start)
408
- minutes = math.floor(secondes / 60)
409
- secondes = secondes - (minutes * 60)
410
- hours = math.floor(minutes / 60)
411
- minutes = minutes - (hours * 60)
412
- information = ("Start the process again if you want a different result. " if randomize_seed else "") + \
413
- "If you don't get the image you wanted, add more details in the « Image description ». " + \
414
- "The image" + (" has" if len(results) == 1 else "s have") + " been generated in " + \
415
- ((str(hours) + " h, ") if hours != 0 else "") + \
416
- ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
417
- str(secondes) + " sec. " + \
418
- "The new image resolution is " + str(result_width) + \
419
- " pixels large and " + str(result_height) + \
420
- " pixels high, so a resolution of " + f'{result_width * result_height:,}' + " pixels."
421
- print(information)
422
- try:
423
- print("Initial resolution: " + f'{input_width * input_height:,}')
424
- print("Final resolution: " + f'{result_width * result_height:,}')
425
- print("edm_steps: " + str(edm_steps))
426
- print("num_samples: " + str(num_samples))
427
- print("downscale: " + str(downscale))
428
- print("Estimated minutes: " + f'{(((result_width * result_height**(1/1.75)) * input_width * input_height * (edm_steps**(1/2)) * (num_samples**(1/2.5)))**(1/2.5)) / 25000:,}')
429
- except Exception as e:
430
- print('Exception of Estimation')
431
-
432
- # Only one image can be shown in the slider
433
- return [noisy_image] + [results[0]], gr.update(label="Downloadable results in *." + output_format + " format", format = output_format, value = results), gr.update(value = information, visible = True), gr.update(visible=True), gr.update(visible=False), gr.update(interactive = False)
434
-
435
- def load_and_reset(param_setting):
436
- print('load_and_reset ==>>')
437
- if torch.cuda.device_count() == 0:
438
- gr.Warning('Set this space to GPU config to make it work.')
439
- return None, None, None, None, None, None, None, None, None, None, None, None, None, None
440
- edm_steps = default_setting.edm_steps
441
- s_stage2 = 1.0
442
- s_stage1 = -1.0
443
- s_churn = 5
444
- s_noise = 1.003
445
- a_prompt = 'Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - ' \
446
- 'realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore ' \
447
- 'detailing, hyper sharpness, perfect without deformations.'
448
- n_prompt = 'painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, ' \
449
- '3D render, unreal engine, blurring, dirty, messy, worst quality, low quality, frames, watermark, ' \
450
- 'signature, jpeg artifacts, deformed, lowres, over-smooth'
451
- color_fix_type = 'Wavelet'
452
- spt_linear_s_stage2 = 0.0
453
- linear_s_stage2 = False
454
- linear_CFG = True
455
- if param_setting == "Quality":
456
- s_cfg = default_setting.s_cfg_Quality
457
- spt_linear_CFG = default_setting.spt_linear_CFG_Quality
458
- model_select = "v0-Q"
459
- elif param_setting == "Fidelity":
460
- s_cfg = default_setting.s_cfg_Fidelity
461
- spt_linear_CFG = default_setting.spt_linear_CFG_Fidelity
462
- model_select = "v0-F"
463
- else:
464
- raise NotImplementedError
465
- gr.Info('The parameters are reset.')
466
- print('<<== load_and_reset')
467
- return edm_steps, s_cfg, s_stage2, s_stage1, s_churn, s_noise, a_prompt, n_prompt, color_fix_type, linear_CFG, \
468
- spt_linear_CFG, linear_s_stage2, spt_linear_s_stage2, model_select
469
-
470
- def log_information(result_gallery):
471
- print('log_information')
472
- if result_gallery is not None:
473
- for i, result in enumerate(result_gallery):
474
- print(result[0])
475
-
476
- def on_select_result(result_slider, result_gallery, evt: gr.SelectData):
477
- print('on_select_result')
478
- if result_gallery is not None:
479
- for i, result in enumerate(result_gallery):
480
- print(result[0])
481
- return [result_slider[0], result_gallery[evt.index][0]]
482
-
483
- def on_render_image_example(result_example):
484
- print('on_render_image_example')
485
- return gr.update(value = result_example, visible = True)
486
-
487
- title_html = """
488
- <h1><center>SUPIR</center></h1>
489
- <big><center>Upscale your images up to x10 freely, without account, without watermark and download it</center></big>
490
- <center><big><big>🤸<big><big><big><big><big><big>🤸</big></big></big></big></big></big></big></big></center>
491
-
492
- <p>This is an online demo of SUPIR, a practicing model scaling for photo-realistic image restoration.
493
- The content added by SUPIR is <b><u>imagination, not real-world information</u></b>.
494
- SUPIR is for beauty and illustration only.
495
- Most of the processes last few minutes.
496
- If you want to upscale AI-generated images, be noticed that <i>PixArt Sigma</i> space can directly generate 5984x5984 images.
497
- Due to Gradio issues, the generated image is slightly less satured than the original.
498
- Please leave a <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR/discussions/new">message in discussion</a> if you encounter issues.
499
- You can also use <a href="https://huggingface.co/spaces/gokaygokay/AuraSR">AuraSR</a> to upscale x4.
500
-
501
- <p><center><a href="https://arxiv.org/abs/2401.13627">Paper</a> &emsp; <a href="http://supir.xpixel.group/">Project Page</a> &emsp; <a href="https://huggingface.co/blog/MonsterMMORPG/supir-sota-image-upscale-better-than-magnific-ai">Local Install Guide</a></center></p>
502
- <p><center><a style="display:inline-block" href='https://github.com/Fanghua-Yu/SUPIR'><img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/Fanghua-Yu/SUPIR?style=social"></a></center></p>
503
- """
504
-
505
-
506
- claim_md = """
507
- ## **Piracy**
508
- The images are not stored but the logs are saved during a month.
509
- ## **How to get SUPIR**
510
- You can get SUPIR on HuggingFace by [duplicating this space](https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR?duplicate=true) and set GPU.
511
- You can also install SUPIR on your computer following [this tutorial](https://huggingface.co/blog/MonsterMMORPG/supir-sota-image-upscale-better-than-magnific-ai).
512
- You can install _Pinokio_ on your computer and then install _SUPIR_ into it. It should be quite easy if you have an Nvidia GPU.
513
- ## **Terms of use**
514
- By using this service, users are required to agree to the following terms: The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research. Please submit a feedback to us if you get any inappropriate answer! We will collect those to keep improving our models. For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
515
- ## **License**
516
- The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/Fanghua-Yu/SUPIR) of SUPIR.
517
- """
518
-
519
- js = """
520
- function createGradioAnimation() {
521
- window.addEventListener("beforeunload", function(e) {
522
- if (document.getElementById('dummy_button_id') && !document.getElementById('dummy_button_id').disabled) {
523
- var confirmationMessage = 'A process is still running. '
524
- + 'If you leave before saving, your changes will be lost.';
525
-
526
- (e || window.event).returnValue = confirmationMessage;
527
- }
528
- return confirmationMessage;
529
- });
530
- return 'Animation created';
531
- }
532
- """
533
-
534
- # Gradio interface
535
- with gr.Blocks(js=js) as interface:
536
- if torch.cuda.device_count() == 0:
537
- with gr.Row():
538
- gr.HTML("""
539
- <p style="background-color: red;"><big><big><big><b>⚠️To use SUPIR, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR?duplicate=true">duplicate this space</a> and set a GPU with 30 GB VRAM.</b>
540
-
541
- You can't use SUPIR directly here because this space runs on a CPU, which is not enough for SUPIR. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR/discussions/new">feedback</a> if you have issues.
542
- </big></big></big></p>
543
- """)
544
- gr.HTML(title_html)
545
-
546
- input_image = gr.Image(label="Input (*.png, *.webp, *.jpeg, *.jpg, *.gif, *.bmp, *.avif)", show_label=True, type="filepath", height=600, elem_id="image-input")
547
- rotation = gr.Radio([["No rotation", 0], ["⤵ Rotate +90°", 90], ["↩ Return 180°", 180], ["⤴ Rotate -90°", -90]], label="Orientation correction", info="Will apply the following rotation before restoring the image; the AI needs a good orientation to understand the content", value=0, visible=False)
548
- with gr.Group():
549
- prompt = gr.Textbox(label="Image description", info="Help the AI understand what the image represents; describe as much as possible, especially the details we can't see on the original image; you can write in any language", value="", placeholder="A 33 years old man, walking, in the street, Santiago, morning, Summer, photorealistic", lines=3)
550
- prompt_hint = gr.HTML("You can use a <a href='"'https://huggingface.co/spaces/badayvedat/LLaVA'"'>LlaVa space</a> to auto-generate the description of your image.")
551
- upscale = gr.Radio([["x1", 1], ["x2", 2], ["x3", 3], ["x4", 4], ["x5", 5], ["x6", 6], ["x7", 7], ["x8", 8], ["x9", 9], ["x10", 10]], label="Upscale factor", info="Resolution x1 to x10", value=2)
552
- output_format = gr.Radio([["As input", "input"], ["*.png", "png"], ["*.webp", "webp"], ["*.jpeg", "jpeg"], ["*.gif", "gif"], ["*.bmp", "bmp"]], label="Image format for result", info="File extention", value="input")
553
- allocation = gr.Slider(label="GPU allocation time (in seconds)", info='lower=May abort run, higher=Quota penalty for next runs; only useful for ZeroGPU; for instance set to 88 when you have the message "You have exceeded your GPU quota (180s requested vs. 89s left)."', value=180, minimum=60, maximum=320, step=1)
554
-
555
- with gr.Accordion("Pre-denoising (optional)", open=False):
556
- gamma_correction = gr.Slider(label="Gamma Correction", info = "lower=lighter, higher=darker", minimum=0.1, maximum=2.0, value=1.0, step=0.1)
557
- denoise_button = gr.Button(value="Pre-denoise")
558
- denoise_image = gr.Image(label="Denoised image", show_label=True, type="filepath", sources=[], interactive = False, height=600, elem_id="image-s1")
559
- denoise_information = gr.HTML(value="If present, the denoised image will be used for the restoration instead of the input image.", visible=False)
560
-
561
- with gr.Accordion("Advanced options", open=False):
562
- a_prompt = gr.Textbox(label="Additional image description",
563
- info="Completes the main image description",
564
- value='Cinematic, High Contrast, highly detailed, taken using a Canon EOS R '
565
- 'camera, hyper detailed photo - realistic maximum detail, 32k, Color '
566
- 'Grading, ultra HD, extreme meticulous detailing, skin pore detailing, clothing fabric detailing, '
567
- 'hyper sharpness, perfect without deformations.',
568
- lines=3)
569
- n_prompt = gr.Textbox(label="Negative image description",
570
- info="Disambiguate by listing what the image does NOT represent",
571
- value='painting, oil painting, illustration, drawing, art, sketch, anime, '
572
- 'cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, '
573
- 'worst quality, low quality, frames, watermark, signature, jpeg artifacts, '
574
- 'deformed, lowres, over-smooth',
575
- lines=3)
576
- edm_steps = gr.Slider(label="Steps", info="lower=faster, higher=more details; too many steps create a checker effect", minimum=1, maximum=200, value=default_setting.edm_steps if torch.cuda.device_count() > 0 else 1, step=1)
577
- num_samples = gr.Slider(label="Num Samples", info="Number of generated results", minimum=1, maximum=4 if not args.use_image_slider else 1
578
- , value=1, step=1)
579
- min_size = gr.Slider(label="Minimum size", info="Minimum height, minimum width of the result", minimum=32, maximum=4096, value=1024, step=32)
580
- downscale = gr.Radio([["/1", 1], ["/2", 2], ["/3", 3], ["/4", 4], ["/5", 5], ["/6", 6], ["/7", 7], ["/8", 8], ["/9", 9], ["/10", 10]], label="Pre-downscale factor", info="Reducing blurred image reduce the process time", value=1)
581
- with gr.Row():
582
- with gr.Column():
583
- model_select = gr.Radio([["💃 Quality (v0-Q)", "v0-Q"], ["🎯 Fidelity (v0-F)", "v0-F"]], label="Model Selection", info="Pretrained model", value="v0-Q")
584
- with gr.Column():
585
- color_fix_type = gr.Radio([["None", "None"], ["AdaIn (improve as a photo)", "AdaIn"], ["Wavelet (for JPEG artifacts)", "Wavelet"]], label="Color-Fix Type", info="AdaIn=Improve following a style, Wavelet=For JPEG artifacts", value="AdaIn")
586
- s_cfg = gr.Slider(label="Text Guidance Scale", info="lower=follow the image, higher=follow the prompt", minimum=1.0, maximum=15.0,
587
- value=default_setting.s_cfg_Quality if torch.cuda.device_count() > 0 else 1.0, step=0.1)
588
- s_stage2 = gr.Slider(label="Restoring Guidance Strength", minimum=0., maximum=1., value=1., step=0.05)
589
- s_stage1 = gr.Slider(label="Pre-denoising Guidance Strength", minimum=-1.0, maximum=6.0, value=-1.0, step=1.0)
590
- s_churn = gr.Slider(label="S-Churn", minimum=0, maximum=40, value=5, step=1)
591
- s_noise = gr.Slider(label="S-Noise", minimum=1.0, maximum=1.1, value=1.003, step=0.001)
592
- with gr.Row():
593
- with gr.Column():
594
- linear_CFG = gr.Checkbox(label="Linear CFG", value=True)
595
- spt_linear_CFG = gr.Slider(label="CFG Start", minimum=1.0,
596
- maximum=9.0, value=default_setting.spt_linear_CFG_Quality if torch.cuda.device_count() > 0 else 1.0, step=0.5)
597
- with gr.Column():
598
- linear_s_stage2 = gr.Checkbox(label="Linear Restoring Guidance", value=False)
599
- spt_linear_s_stage2 = gr.Slider(label="Guidance Start", minimum=0.,
600
- maximum=1., value=0., step=0.05)
601
- with gr.Column():
602
- diff_dtype = gr.Radio([["fp32 (precision)", "fp32"], ["fp16 (medium)", "fp16"], ["bf16 (speed)", "bf16"]], label="Diffusion Data Type", value="fp32")
603
- with gr.Column():
604
- ae_dtype = gr.Radio([["fp32 (precision)", "fp32"], ["bf16 (speed)", "bf16"]], label="Auto-Encoder Data Type", value="fp32")
605
- randomize_seed = gr.Checkbox(label = "\U0001F3B2 Randomize seed", value = True, info = "If checked, result is always different")
606
- seed = gr.Slider(label="Seed", minimum=0, maximum=max_64_bit_int, step=1, randomize=True)
607
- with gr.Group():
608
- param_setting = gr.Radio(["Quality", "Fidelity"], label="Presetting", value = "Quality")
609
- restart_button = gr.Button(value="Apply presetting")
610
-
611
- with gr.Column():
612
- diffusion_button = gr.Button(value="🚀 Upscale/Restore", variant = "primary", elem_id = "process_button")
613
- reset_btn = gr.Button(value="🧹 Reinit page", variant="stop", elem_id="reset_button", visible = False)
614
- dummy_button = gr.Button(elem_id = "dummy_button_id", visible = False, interactive = False)
615
-
616
- warning = gr.HTML(elem_id="warning", value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
617
- restore_information = gr.HTML(value = "Restart the process to get another result.", visible = False)
618
- result_slider = ImageSlider(label = 'Comparator', show_label = False, interactive = False, elem_id = "slider1", show_download_button = False, visible = False)
619
- result_gallery = gr.Gallery(label = 'Downloadable results', show_label = True, interactive = False, elem_id = "gallery1")
620
- result_example = gr.HTML(elem_id="result_example", visible = False)
621
- result_image_example = gr.Image(label="Example Image", visible = False)
622
-
623
- with gr.Row(elem_id="cache", visible=False):
624
- input_image_debug=gr.Image(label="Image Debug", type="filepath")
625
- prompt_debug=gr.Textbox(label="Prompt Debug", value='')
626
- upscale_debug=gr.Radio([["x1", 1], ["x2", 2], ["x3", 3], ["x4", 4], ["x5", 5], ["x6", 6], ["x7", 7], ["x8", 8], ["x9", 9], ["x10", 10]], label="Upscale factor Debug", info="Resolution x1 to x10", value=2)
627
-
628
- gr.Examples(
629
- label = "Examples for cache",
630
- examples = [
631
- [
632
- "./Examples/Example2.jpeg",
633
- 0,
634
- "./Examples/Example2.jpeg",
635
- "La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
636
- "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
637
- "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
638
- 1, # num_samples
639
- 32, # min_size
640
- 1, # downscale
641
- 1, # upscale
642
- 100, # edm_steps
643
- -1, # s_stage1
644
- 1, # s_stage2
645
- 7.5, # s_cfg
646
- True, # randomize_seed
647
- 42, # seed
648
- 5, # s_churn
649
- 1.003, # s_noise
650
- "Wavelet", # color_fix_type
651
- "fp16", # diff_dtype
652
- "bf16", # ae_dtype
653
- 1.0, # gamma_correction
654
- True, # linear_CFG
655
- 4, # spt_linear_CFG
656
- False, # linear_s_stage2
657
- 0., # spt_linear_s_stage2
658
- "v0-Q", # model_select
659
- "input", # output_format
660
- 60 # allocation
661
- ],
662
- [
663
- "./Examples/Example2.jpeg",
664
- 0,
665
- "./Examples/Example2.jpeg",
666
- "La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
667
- "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
668
- "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
669
- 4, # num_samples
670
- 32, # min_size
671
- 1, # downscale
672
- 1, # upscale
673
- 100, # edm_steps
674
- -1, # s_stage1
675
- 1, # s_stage2
676
- 7.5, # s_cfg
677
- True, # randomize_seed
678
- 42, # seed
679
- 5, # s_churn
680
- 1.003, # s_noise
681
- "Wavelet", # color_fix_type
682
- "fp16", # diff_dtype
683
- "bf16", # ae_dtype
684
- 1.0, # gamma_correction
685
- True, # linear_CFG
686
- 4, # spt_linear_CFG
687
- False, # linear_s_stage2
688
- 0., # spt_linear_s_stage2
689
- "v0-Q", # model_select
690
- "input", # output_format
691
- 60 # allocation
692
- ]
693
- ],
694
- run_on_click = True,
695
- fn = stage2_process_example,
696
- inputs = [
697
- input_image,
698
- rotation,
699
- denoise_image,
700
- prompt,
701
- a_prompt,
702
- n_prompt,
703
- num_samples,
704
- min_size,
705
- downscale,
706
- upscale,
707
- edm_steps,
708
- s_stage1,
709
- s_stage2,
710
- s_cfg,
711
- randomize_seed,
712
- seed,
713
- s_churn,
714
- s_noise,
715
- color_fix_type,
716
- diff_dtype,
717
- ae_dtype,
718
- gamma_correction,
719
- linear_CFG,
720
- spt_linear_CFG,
721
- linear_s_stage2,
722
- spt_linear_s_stage2,
723
- model_select,
724
- output_format,
725
- allocation
726
- ],
727
- outputs = [
728
- result_example,
729
- warning,
730
- dummy_button,
731
- prompt_hint
732
- ],
733
- cache_examples = True,
734
- )
735
-
736
- gr.Examples(
737
- label = "Examples for demo",
738
- examples = [
739
- [
740
- "./Examples/Example1.png",
741
- 0,
742
- "./Examples/Example1.png",
743
- "Group of people, walking, happy, in the street, photorealistic, 8k, extremely detailled",
744
- "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
745
- "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
746
- 2, # num_samples
747
- 1024, # min_size
748
- 1, # downscale
749
- 8, # upscale
750
- 100, # edm_steps
751
- -1, # s_stage1
752
- 1, # s_stage2
753
- 7.5, # s_cfg
754
- False, # randomize_seed
755
- 42, # seed
756
- 5, # s_churn
757
- 1.003, # s_noise
758
- "AdaIn", # color_fix_type
759
- "fp16", # diff_dtype
760
- "bf16", # ae_dtype
761
- 1.0, # gamma_correction
762
- True, # linear_CFG
763
- 4, # spt_linear_CFG
764
- False, # linear_s_stage2
765
- 0., # spt_linear_s_stage2
766
- "v0-Q", # model_select
767
- "input", # output_format
768
- 180 # allocation
769
- ],
770
- [
771
- "./Examples/Example2.jpeg",
772
- 0,
773
- "./Examples/Example2.jpeg",
774
- "La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
775
- "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
776
- "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
777
- 1, # num_samples
778
- 1024, # min_size
779
- 1, # downscale
780
- 1, # upscale
781
- 100, # edm_steps
782
- -1, # s_stage1
783
- 1, # s_stage2
784
- 7.5, # s_cfg
785
- False, # randomize_seed
786
- 42, # seed
787
- 5, # s_churn
788
- 1.003, # s_noise
789
- "Wavelet", # color_fix_type
790
- "fp16", # diff_dtype
791
- "bf16", # ae_dtype
792
- 1.0, # gamma_correction
793
- True, # linear_CFG
794
- 4, # spt_linear_CFG
795
- False, # linear_s_stage2
796
- 0., # spt_linear_s_stage2
797
- "v0-Q", # model_select
798
- "input", # output_format
799
- 60 # allocation
800
- ],
801
- [
802
- "./Examples/Example3.webp",
803
- 0,
804
- "./Examples/Example3.webp",
805
- "A red apple",
806
- "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
807
- "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
808
- 1, # num_samples
809
- 1024, # min_size
810
- 1, # downscale
811
- 1, # upscale
812
- 200, # edm_steps
813
- -1, # s_stage1
814
- 1, # s_stage2
815
- 7.5, # s_cfg
816
- False, # randomize_seed
817
- 42, # seed
818
- 5, # s_churn
819
- 1.003, # s_noise
820
- "Wavelet", # color_fix_type
821
- "fp16", # diff_dtype
822
- "bf16", # ae_dtype
823
- 1.0, # gamma_correction
824
- True, # linear_CFG
825
- 4, # spt_linear_CFG
826
- False, # linear_s_stage2
827
- 0., # spt_linear_s_stage2
828
- "v0-Q", # model_select
829
- "input", # output_format
830
- 180 # allocation
831
- ],
832
- [
833
- "./Examples/Example3.webp",
834
- 0,
835
- "./Examples/Example3.webp",
836
- "A red marble",
837
- "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
838
- "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
839
- 1, # num_samples
840
- 1024, # min_size
841
- 1, # downscale
842
- 1, # upscale
843
- 200, # edm_steps
844
- -1, # s_stage1
845
- 1, # s_stage2
846
- 7.5, # s_cfg
847
- False, # randomize_seed
848
- 42, # seed
849
- 5, # s_churn
850
- 1.003, # s_noise
851
- "Wavelet", # color_fix_type
852
- "fp16", # diff_dtype
853
- "bf16", # ae_dtype
854
- 1.0, # gamma_correction
855
- True, # linear_CFG
856
- 4, # spt_linear_CFG
857
- False, # linear_s_stage2
858
- 0., # spt_linear_s_stage2
859
- "v0-Q", # model_select
860
- "input", # output_format
861
- 180 # allocation
862
- ],
863
- ],
864
- run_on_click = True,
865
- fn = stage2_process,
866
- inputs = [
867
- input_image,
868
- rotation,
869
- denoise_image,
870
- prompt,
871
- a_prompt,
872
- n_prompt,
873
- num_samples,
874
- min_size,
875
- downscale,
876
- upscale,
877
- edm_steps,
878
- s_stage1,
879
- s_stage2,
880
- s_cfg,
881
- randomize_seed,
882
- seed,
883
- s_churn,
884
- s_noise,
885
- color_fix_type,
886
- diff_dtype,
887
- ae_dtype,
888
- gamma_correction,
889
- linear_CFG,
890
- spt_linear_CFG,
891
- linear_s_stage2,
892
- spt_linear_s_stage2,
893
- model_select,
894
- output_format,
895
- allocation
896
- ],
897
- outputs = [
898
- result_slider,
899
- result_gallery,
900
- restore_information,
901
- reset_btn,
902
- warning,
903
- dummy_button
904
- ],
905
- cache_examples = False,
906
- )
907
-
908
- with gr.Row():
909
- gr.Markdown(claim_md)
910
-
911
- input_image.upload(fn = check_upload, inputs = [
912
- input_image
913
- ], outputs = [
914
- rotation
915
- ], queue = False, show_progress = False)
916
-
917
- denoise_button.click(fn = check_and_update, inputs = [
918
- input_image
919
- ], outputs = [warning, dummy_button], queue = False, show_progress = False).success(fn = stage1_process, inputs = [
920
- input_image,
921
- gamma_correction,
922
- diff_dtype,
923
- ae_dtype
924
- ], outputs=[
925
- denoise_image,
926
- denoise_information,
927
- dummy_button
928
- ])
929
-
930
- diffusion_button.click(fn = update_seed, inputs = [
931
- randomize_seed,
932
- seed
933
- ], outputs = [
934
- seed
935
- ], queue = False, show_progress = False).then(fn = check_and_update, inputs = [
936
- input_image
937
- ], outputs = [warning, dummy_button], queue = False, show_progress = False).success(fn=stage2_process, inputs = [
938
- input_image,
939
- rotation,
940
- denoise_image,
941
- prompt,
942
- a_prompt,
943
- n_prompt,
944
- num_samples,
945
- min_size,
946
- downscale,
947
- upscale,
948
- edm_steps,
949
- s_stage1,
950
- s_stage2,
951
- s_cfg,
952
- randomize_seed,
953
- seed,
954
- s_churn,
955
- s_noise,
956
- color_fix_type,
957
- diff_dtype,
958
- ae_dtype,
959
- gamma_correction,
960
- linear_CFG,
961
- spt_linear_CFG,
962
- linear_s_stage2,
963
- spt_linear_s_stage2,
964
- model_select,
965
- output_format,
966
- allocation
967
- ], outputs = [
968
- result_slider,
969
- result_gallery,
970
- restore_information,
971
- reset_btn,
972
- warning,
973
- dummy_button
974
- ]).success(fn = log_information, inputs = [
975
- result_gallery
976
- ], outputs = [], queue = False, show_progress = False)
977
-
978
- result_gallery.change(on_select_result, [result_slider, result_gallery], result_slider)
979
- result_gallery.select(on_select_result, [result_slider, result_gallery], result_slider)
980
- result_example.change(on_render_image_example, result_example, result_image_example)
981
-
982
- restart_button.click(fn = load_and_reset, inputs = [
983
- param_setting
984
- ], outputs = [
985
- edm_steps,
986
- s_cfg,
987
- s_stage2,
988
- s_stage1,
989
- s_churn,
990
- s_noise,
991
- a_prompt,
992
- n_prompt,
993
- color_fix_type,
994
- linear_CFG,
995
- spt_linear_CFG,
996
- linear_s_stage2,
997
- spt_linear_s_stage2,
998
- model_select
999
- ])
1000
-
1001
- reset_btn.click(fn = reset, inputs = [], outputs = [
1002
- input_image,
1003
- rotation,
1004
- denoise_image,
1005
- prompt,
1006
- a_prompt,
1007
- n_prompt,
1008
- num_samples,
1009
- min_size,
1010
- downscale,
1011
- upscale,
1012
- edm_steps,
1013
- s_stage1,
1014
- s_stage2,
1015
- s_cfg,
1016
- randomize_seed,
1017
- seed,
1018
- s_churn,
1019
- s_noise,
1020
- color_fix_type,
1021
- diff_dtype,
1022
- ae_dtype,
1023
- gamma_correction,
1024
- linear_CFG,
1025
- spt_linear_CFG,
1026
- linear_s_stage2,
1027
- spt_linear_s_stage2,
1028
- model_select,
1029
- output_format,
1030
- allocation
1031
- ], queue = False, show_progress = False)
1032
-
1033
- def handle_field_debug_change(input_image_debug_data, prompt_debug_data, upscale_debug_data):
1034
- input_image_debug_value[0] = input_image_debug_data
1035
- prompt_debug_value[0] = prompt_debug_data
1036
- upscale_debug_value[0] = upscale_debug_data
1037
- return []
1038
-
1039
- input_image_debug.upload(
1040
- fn=handle_field_debug_change,
1041
- inputs=[input_image_debug, prompt_debug, upscale_debug],
1042
- outputs=[]
1043
- )
1044
-
1045
- prompt_debug.change(
1046
- fn=handle_field_debug_change,
1047
- inputs=[input_image_debug, prompt_debug, upscale_debug],
1048
- outputs=[]
1049
- )
1050
-
1051
- upscale_debug.change(
1052
- fn=handle_field_debug_change,
1053
- inputs=[input_image_debug, prompt_debug, upscale_debug],
1054
- outputs=[]
1055
- )
1056
-
1057
- interface.queue(10).launch(mcp_server=True, ssr_mode=False)
 
1
+ import os
2
+ # PyTorch 2.8 (temporary hack)
3
+ os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
4
+
5
+ # --- 1. Model Download and Setup (Diffusers Backend) ---
6
+ try:
7
+ import spaces
8
+ except:
9
+ class spaces():
10
+ def GPU(*args, **kwargs):
11
+ def decorator(function):
12
+ return lambda *dummy_args, **dummy_kwargs: function(*dummy_args, **dummy_kwargs)
13
+ return decorator
14
+
15
+ import torch
16
+ from diffusers import FlowMatchEulerDiscreteScheduler
17
+ from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
18
+ from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
19
+ from diffusers.utils.export_utils import export_to_video
20
+ import gradio as gr
21
+ import tempfile
22
+ import time
23
+ from datetime import datetime
24
+ import numpy as np
25
+ from PIL import Image
26
+ import random
27
+ import math
28
+ import gc
29
+ from gradio_client import Client, handle_file # Import for API call
30
+
31
+ # Import the optimization function from the separate file
32
+ from optimization import optimize_pipeline_
33
+
34
+ # --- Constants and Model Loading ---
35
+ MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
36
+
37
+ # --- NEW: Flexible Dimension Constants ---
38
+ MAX_DIMENSION = 832
39
+ MIN_DIMENSION = 480
40
+ DIMENSION_MULTIPLE = 16
41
+ SQUARE_SIZE = 480
42
+
43
+ MAX_SEED = np.iinfo(np.int32).max
44
+
45
+ FIXED_FPS = 24
46
+ MIN_FRAMES_MODEL = 8
47
+ MAX_FRAMES_MODEL = 81
48
+
49
+ MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS, 1)
50
+ MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS, 1)
51
+
52
+ input_image_debug_value = [None]
53
+ end_image_debug_value = [None]
54
+ prompt_debug_value = [None]
55
+ total_second_length_debug_value = [None]
56
+
57
+ default_negative_prompt = "Vibrant colors, overexposure, static, blurred details, subtitles, error, style, artwork, painting, image, still, overall gray, worst quality, low quality, JPEG compression residue, ugly, mutilated, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, malformed limbs, fused fingers, still image, cluttered background, three legs, many people in the background, walking backwards, overexposure, jumpcut, crossfader, "
58
+
59
+ print("Loading transformer...")
60
+
61
+ transformer = WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
62
+ subfolder='transformer',
63
+ torch_dtype=torch.bfloat16,
64
+ device_map='cuda',
65
+ )
66
+
67
+ print("Loadingtransformer 2...")
68
+
69
+ transformer_2 = WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
70
+ subfolder='transformer_2',
71
+ torch_dtype=torch.bfloat16,
72
+ device_map='cuda',
73
+ )
74
+
75
+ print("Loading models into memory. This may take a few minutes...")
76
+
77
+ pipe = WanImageToVideoPipeline.from_pretrained(
78
+ MODEL_ID,
79
+ transformer = transformer,
80
+ transformer_2 = transformer_2,
81
+ torch_dtype=torch.bfloat16,
82
+ )
83
+ print("Loading scheduler...")
84
+ pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config, shift=8.0)
85
+ pipe.to('cuda')
86
+
87
+ print("Clean cache...")
88
+ for i in range(3):
89
+ gc.collect()
90
+ torch.cuda.synchronize()
91
+ torch.cuda.empty_cache()
92
+
93
+ print("Optimizing pipeline...")
94
+
95
+ optimize_pipeline_(pipe,
96
+ image=Image.new('RGB', (MAX_DIMENSION, MIN_DIMENSION)),
97
+ prompt='prompt',
98
+ height=MIN_DIMENSION,
99
+ width=MAX_DIMENSION,
100
+ num_frames=MAX_FRAMES_MODEL,
101
+ )
102
+ print("All models loaded and optimized. Gradio app is ready.")
103
+
104
+
105
+ # --- 2. Image Processing and Application Logic ---
106
+ def generate_end_frame(start_img, gen_prompt, progress=gr.Progress(track_tqdm=True)):
107
+ """Calls an external Gradio API to generate an image."""
108
+ if start_img is None:
109
+ raise gr.Error("Please provide a Start Frame first.")
110
+
111
+ hf_token = os.getenv("HF_TOKEN")
112
+ if not hf_token:
113
+ raise gr.Error("HF_TOKEN not found in environment variables. Please set it in your Space secrets.")
114
+
115
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
116
+ start_img.save(tmpfile.name)
117
+ tmp_path = tmpfile.name
118
+
119
+ progress(0.1, desc="Connecting to image generation API...")
120
+ client = Client("multimodalart/nano-banana-private")
121
+
122
+ progress(0.5, desc=f"Generating with prompt: '{gen_prompt}'...")
123
+ try:
124
+ result = client.predict(
125
+ prompt=gen_prompt,
126
+ images=[
127
+ {"image": handle_file(tmp_path)}
128
+ ],
129
+ manual_token=hf_token,
130
+ api_name="/unified_image_generator"
131
+ )
132
+ finally:
133
+ os.remove(tmp_path)
134
+
135
+ progress(1.0, desc="Done!")
136
+ print(result)
137
+ return result
138
+
139
+ def switch_to_upload_tab():
140
+ """Returns a gr.Tabs update to switch to the first tab."""
141
+ return gr.Tabs(selected="upload_tab")
142
+
143
+
144
+ def process_image_for_video(image: Image.Image) -> Image.Image:
145
+ """
146
+ Resizes an image based on the following rules for video generation:
147
+ 1. The longest side will be scaled down to MAX_DIMENSION if it's larger.
148
+ 2. The shortest side will be scaled up to MIN_DIMENSION if it's smaller.
149
+ 3. The final dimensions will be rounded to the nearest multiple of DIMENSION_MULTIPLE.
150
+ 4. Square images are resized to a fixed SQUARE_SIZE.
151
+ The aspect ratio is preserved as closely as possible.
152
+ """
153
+ width, height = image.size
154
+
155
+ # Rule 4: Handle square images
156
+ if width == height:
157
+ return image.resize((SQUARE_SIZE, SQUARE_SIZE), Image.Resampling.LANCZOS)
158
+
159
+ # Determine target dimensions while preserving aspect ratio
160
+ aspect_ratio = width / height
161
+ new_width, new_height = width, height
162
+
163
+ # Rule 1: Scale down if too large
164
+ if new_width > MAX_DIMENSION or new_height > MAX_DIMENSION:
165
+ if aspect_ratio > 1: # Landscape
166
+ scale = MAX_DIMENSION / new_width
167
+ else: # Portrait
168
+ scale = MAX_DIMENSION / new_height
169
+ new_width *= scale
170
+ new_height *= scale
171
+
172
+ # Rule 2: Scale up if too small
173
+ if new_width < MIN_DIMENSION or new_height < MIN_DIMENSION:
174
+ if aspect_ratio > 1: # Landscape
175
+ scale = MIN_DIMENSION / new_height
176
+ else: # Portrait
177
+ scale = MIN_DIMENSION / new_width
178
+ new_width *= scale
179
+ new_height *= scale
180
+
181
+ # Rule 3: Round to the nearest multiple of DIMENSION_MULTIPLE
182
+ final_width = int(round(new_width / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
183
+ final_height = int(round(new_height / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
184
+
185
+ # Ensure final dimensions are at least the minimum
186
+ final_width = max(final_width, MIN_DIMENSION if aspect_ratio < 1 else SQUARE_SIZE)
187
+ final_height = max(final_height, MIN_DIMENSION if aspect_ratio > 1 else SQUARE_SIZE)
188
+
189
+
190
+ return image.resize((final_width, final_height), Image.Resampling.LANCZOS)
191
+
192
+ def resize_and_crop_to_match(target_image, reference_image):
193
+ """Resizes and center-crops the target image to match the reference image's dimensions."""
194
+ ref_width, ref_height = reference_image.size
195
+ target_width, target_height = target_image.size
196
+ scale = max(ref_width / target_width, ref_height / target_height)
197
+ new_width, new_height = int(target_width * scale), int(target_height * scale)
198
+ resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
199
+ left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
200
+ return resized.crop((left, top, left + ref_width, top + ref_height))
201
+
202
+ def generate_video(
203
+ start_image_pil,
204
+ end_image_pil,
205
+ prompt,
206
+ negative_prompt=default_negative_prompt,
207
+ duration_seconds=2.1,
208
+ steps=8,
209
+ guidance_scale=1,
210
+ guidance_scale_2=1,
211
+ seed=42,
212
+ randomize_seed=True,
213
+ progress=gr.Progress(track_tqdm=True)
214
+ ):
215
+ start = time.time()
216
+ allocation_time = 120
217
+ factor = 1
218
+
219
+ if input_image_debug_value[0] is not None or end_image_debug_value[0] is not None or prompt_debug_value[0] is not None or total_second_length_debug_value[0] is not None:
220
+ start_image_pil = input_image_debug_value[0]
221
+ end_image_pil = end_image_debug_value[0]
222
+ prompt = prompt_debug_value[0]
223
+ duration_seconds = total_second_length_debug_value[0]
224
+ allocation_time = min(duration_seconds * 60 * 100, 10 * 60)
225
+ factor = 3.1
226
+
227
+ if start_image_pil is None or end_image_pil is None:
228
+ raise gr.Error("Please upload both a start and an end image.")
229
+
230
+ # Step 1: Process the start image to get our target dimensions based on the new rules.
231
+ processed_start_image = process_image_for_video(start_image_pil)
232
+
233
+ # Step 2: Make the end image match the *exact* dimensions of the processed start image.
234
+ processed_end_image = resize_and_crop_to_match(end_image_pil, processed_start_image)
235
+
236
+ target_height, target_width = processed_start_image.height, processed_start_image.width
237
+
238
+ # Handle seed and frame count
239
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
240
+ num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
241
+
242
+ progress(0.2, desc=f"Generating {num_frames} frames at {target_width}x{target_height} (seed: {current_seed})...")
243
+
244
+ progress(0.1, desc="Preprocessing images...")
245
+ output_video, download_button, seed_input = generate_video_on_gpu(
246
+ start_image_pil,
247
+ end_image_pil,
248
+ prompt,
249
+ negative_prompt,
250
+ duration_seconds,
251
+ steps,
252
+ guidance_scale,
253
+ guidance_scale_2,
254
+ seed,
255
+ randomize_seed,
256
+ progress,
257
+ allocation_time,
258
+ factor,
259
+ target_height,
260
+ target_width,
261
+ current_seed,
262
+ num_frames,
263
+ processed_start_image,
264
+ processed_end_image
265
+ )
266
+ progress(1.0, desc="Done!")
267
+ end = time.time()
268
+ secondes = int(end - start)
269
+ minutes = math.floor(secondes / 60)
270
+ secondes = secondes - (minutes * 60)
271
+ hours = math.floor(minutes / 60)
272
+ minutes = minutes - (hours * 60)
273
+ information = ("Start the process again if you want a different result. " if randomize_seed else "") + \
274
+ "The video been generated in " + \
275
+ ((str(hours) + " h, ") if hours != 0 else "") + \
276
+ ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
277
+ str(secondes) + " sec. " + \
278
+ "The video resolution is " + str(target_width) + \
279
+ " pixels large and " + str(target_height) + \
280
+ " pixels high, so a resolution of " + f'{target_width * target_height:,}' + " pixels."
281
+ return [output_video, download_button, seed_input, gr.update(value = information, visible = True)]
282
+
283
+ def get_duration(
284
+ start_image_pil,
285
+ end_image_pil,
286
+ prompt,
287
+ negative_prompt,
288
+ duration_seconds,
289
+ steps,
290
+ guidance_scale,
291
+ guidance_scale_2,
292
+ seed,
293
+ randomize_seed,
294
+ progress,
295
+ allocation_time,
296
+ factor,
297
+ target_height,
298
+ target_width,
299
+ current_seed,
300
+ num_frames,
301
+ processed_start_image,
302
+ processed_end_image
303
+ ):
304
+ return allocation_time
305
+
306
+ @spaces.GPU(duration=get_duration)
307
+ def generate_video_on_gpu(
308
+ start_image_pil,
309
+ end_image_pil,
310
+ prompt,
311
+ negative_prompt,
312
+ duration_seconds,
313
+ steps,
314
+ guidance_scale,
315
+ guidance_scale_2,
316
+ seed,
317
+ randomize_seed,
318
+ progress,
319
+ allocation_time,
320
+ factor,
321
+ target_height,
322
+ target_width,
323
+ current_seed,
324
+ num_frames,
325
+ processed_start_image,
326
+ processed_end_image
327
+ ):
328
+ """
329
+ Generates a video by interpolating between a start and end image, guided by a text prompt,
330
+ using the diffusers Wan2.2 pipeline.
331
+ """
332
+ print("Generate a video with the prompt: " + prompt)
333
+
334
+ output_frames_list = pipe(
335
+ image=processed_start_image,
336
+ last_image=processed_end_image,
337
+ prompt=prompt,
338
+ negative_prompt=negative_prompt,
339
+ height=target_height,
340
+ width=target_width,
341
+ num_frames=int(num_frames * factor),
342
+ guidance_scale=float(guidance_scale),
343
+ guidance_scale_2=float(guidance_scale_2),
344
+ num_inference_steps=int(steps),
345
+ generator=torch.Generator(device="cuda").manual_seed(current_seed),
346
+ ).frames[0]
347
+
348
+ progress(0.9, desc="Encoding and saving video...")
349
+
350
+ video_path = 'wan_' + datetime.now().strftime("%Y-%m-%d_%H-%M-%S.%f") + '.mp4'
351
+ print("Exporting video: " + video_path)
352
+
353
+ export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
354
+ print("Video exported: " + video_path)
355
+
356
+ return video_path, gr.update(value = video_path, visible = True), current_seed
357
+
358
+
359
+ # --- 3. Gradio User Interface ---
360
+
361
+ with gr.Blocks() as app:
362
+ gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
363
+ gr.Markdown("Based on the [Wan 2.2 First/Last Frame workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/), applied to 🧨 Diffusers + [lightx2v/Wan2.2-Lightning](https://huggingface.co/lightx2v/Wan2.2-Lightning) 8-step LoRA")
364
+
365
+ with gr.Row(elem_id="general_items"):
366
+ with gr.Column():
367
+ with gr.Group(elem_id="group_all"):
368
+ with gr.Row():
369
+ start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
370
+ # Capture the Tabs component in a variable and assign IDs to tabs
371
+ with gr.Tabs(elem_id="group_tabs") as tabs:
372
+ with gr.TabItem("Upload", id="upload_tab"):
373
+ end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
374
+ with gr.TabItem("Generate", id="generate_tab"):
375
+ generate_5seconds = gr.Button("Generate scene 5 seconds in the future", elem_id="fivesec")
376
+ gr.Markdown("Generate a custom end-frame with an edit model like [Nano Banana](https://huggingface.co/spaces/multimodalart/nano-banana) or [Qwen Image Edit](https://huggingface.co/spaces/multimodalart/Qwen-Image-Edit-Fast)", elem_id="or_item")
377
+ prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images")
378
+
379
+ with gr.Accordion("Advanced Settings", open=False):
380
+ duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=2.1, label="Video Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
381
+ negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
382
+ steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=8, label="Inference Steps")
383
+ guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - high noise")
384
+ guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - low noise")
385
+ with gr.Row():
386
+ seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
387
+ randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True)
388
+
389
+ generate_button = gr.Button("Generate Video", variant="primary")
390
+
391
+ with gr.Column():
392
+ output_video = gr.Video(label="Generated Video", autoplay = True, loop = True)
393
+ download_button = gr.DownloadButton(label="Download", visible = True)
394
+ video_information = gr.HTML(value = "", visible = True)
395
+
396
+ # Main video generation button
397
+ ui_inputs = [
398
+ start_image,
399
+ end_image,
400
+ prompt,
401
+ negative_prompt_input,
402
+ duration_seconds_input,
403
+ steps_slider,
404
+ guidance_scale_input,
405
+ guidance_scale_2_input,
406
+ seed_input,
407
+ randomize_seed_checkbox
408
+ ]
409
+ ui_outputs = [output_video, download_button, seed_input, video_information]
410
+
411
+ generate_button.click(
412
+ fn=generate_video,
413
+ inputs=ui_inputs,
414
+ outputs=ui_outputs
415
+ )
416
+
417
+ generate_5seconds.click(
418
+ fn=switch_to_upload_tab,
419
+ inputs=None,
420
+ outputs=[tabs]
421
+ ).then(
422
+ fn=lambda img: generate_end_frame(img, "this image is a still frame from a movie. generate a new frame with what happens on this scene 5 seconds in the future"),
423
+ inputs=[start_image],
424
+ outputs=[end_image]
425
+ ).success(
426
+ fn=generate_video,
427
+ inputs=ui_inputs,
428
+ outputs=ui_outputs
429
+ )
430
+
431
+ with gr.Row(visible=False):
432
+ input_image_debug=gr.Image(type="pil", label="Image Debug")
433
+ end_image_debug=gr.Image(type="pil", label="End Image Debug")
434
+ prompt_debug=gr.Textbox(label="Prompt Debug")
435
+ total_second_length_debug=gr.Slider(label="Additional Video Length to Generate (seconds) Debug", minimum=1, maximum=120, value=10, step=0.1)
436
+ gr.Examples(
437
+ examples=[["ugly_sonic.jpeg", "squatting_sonic.png", "the character dodges the missiles"]],
438
+ inputs=[start_image, end_image, prompt],
439
+ outputs=ui_outputs,
440
+ fn=generate_video,
441
+ run_on_click=True,
442
+ cache_examples=True,
443
+ cache_mode='lazy',
444
+ )
445
+
446
+ gr.Examples(
447
+ label = "Examples from demo",
448
+ examples = [
449
+ ["poli_tower.png", "tower_takes_off.png", "the man turns around"],
450
+ ["ugly_sonic.jpeg", "squatting_sonic.png", "the character dodges the missiles"],
451
+ ["capyabara_zoomed.png", "capyabara.webp", "a dramatic dolly zoom"],
452
+ ],
453
+ inputs = [start_image, end_image, prompt],
454
+ outputs = ui_outputs,
455
+ fn = generate_video,
456
+ cache_examples = False,
457
+ )
458
+
459
+ def handle_field_debug_change(input_image_debug_data, end_image_debug_data, prompt_debug_data, total_second_length_debug_data):
460
+ input_image_debug_value[0] = input_image_debug_data
461
+ end_image_debug_value[0] = end_image_debug_data
462
+ prompt_debug_value[0] = prompt_debug_data
463
+ total_second_length_debug_value[0] = total_second_length_debug_data
464
+ return []
465
+
466
+ input_image_debug.upload(
467
+ fn=handle_field_debug_change,
468
+ inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
469
+ outputs=[]
470
+ )
471
+
472
+ end_image_debug.upload(
473
+ fn=handle_field_debug_change,
474
+ inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
475
+ outputs=[]
476
+ )
477
+
478
+ prompt_debug.change(
479
+ fn=handle_field_debug_change,
480
+ inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
481
+ outputs=[]
482
+ )
483
+
484
+ total_second_length_debug.change(
485
+ fn=handle_field_debug_change,
486
+ inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
487
+ outputs=[]
488
+ )
489
+
490
+ if __name__ == "__main__":
491
+ app.launch(mcp_server=True, share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
optimization.py CHANGED
@@ -65,6 +65,7 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
65
 
66
  @spaces.GPU(duration=1500)
67
  def compile_transformer():
 
68
 
69
  # This LoRA fusion part remains the same
70
  pipeline.load_lora_weights(
@@ -115,6 +116,7 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
115
  compiled_2 = aoti_compile(exported_2, INDUCTOR_CONFIGS)
116
 
117
  # Return the two compiled models
 
118
  return compiled_1, compiled_2
119
 
120
 
 
65
 
66
  @spaces.GPU(duration=1500)
67
  def compile_transformer():
68
+ print("Start compile_transformer()")
69
 
70
  # This LoRA fusion part remains the same
71
  pipeline.load_lora_weights(
 
116
  compiled_2 = aoti_compile(exported_2, INDUCTOR_CONFIGS)
117
 
118
  # Return the two compiled models
119
+ print("End compile_transformer()")
120
  return compiled_1, compiled_2
121
 
122
 
requirements.txt CHANGED
@@ -1,43 +1,11 @@
1
- pydantic==2.10.6 # To avoid the message "No API found" or "Internal server error"
2
 
3
- fastapi==0.115.13
4
- gradio_imageslider==0.0.20
5
- gradio_client==1.10.3
6
- numpy==1.26.4
7
- requests==2.32.4
8
- sentencepiece==0.2.0
9
- tokenizers==0.19.1
10
- torchvision==0.22.0
11
- uvicorn==0.34.3
12
- wandb==0.20.1
13
- httpx==0.28.1
14
- transformers==4.43.0
15
- accelerate==1.8.0
16
- scikit-learn==1.7.0
17
- einops==0.8.1
18
- einops-exts==0.0.4
19
- timm==1.0.15
20
- openai-clip==1.0.1
21
- fsspec==2025.5.1
22
- kornia==0.8.1
23
- matplotlib==3.10.3
24
- ninja==1.11.1.4
25
- omegaconf==2.3.0
26
- opencv-python==4.11.0.86
27
- pandas==2.3.0
28
- pillow==11.2.1
29
- pytorch-lightning==2.5.1.post0
30
- PyYAML==6.0.2
31
- scipy==1.15.3
32
- tqdm==4.67.1
33
- triton==3.3.0
34
- urllib3==2.4.0
35
- webdataset==0.2.111
36
- xformers==0.0.30
37
- facexlib==0.3.0
38
- k-diffusion==0.1.1.post1
39
- diffusers==0.33.1
40
- imageio==2.37.0
41
- pillow-heif==0.22.0
42
-
43
- open-clip-torch==2.24.0
 
1
+ git+https://github.com/linoytsaban/diffusers.git@wan22-loras
2
 
3
+ transformers
4
+ accelerate
5
+ safetensors==0.6.2
6
+ sentencepiece==0.2.1
7
+ peft==0.17.1
8
+ ftfy==6.3.1
9
+ imageio-ffmpeg==0.6.0
10
+ opencv-python==4.12.0.88
11
+ torchao==0.11.0