Spaces:
Running on Zero
Running on Zero
update app
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import os
|
| 3 |
import gc
|
| 4 |
import gradio as gr
|
|
@@ -6,7 +5,6 @@ import numpy as np
|
|
| 6 |
import random
|
| 7 |
import spaces
|
| 8 |
import torch
|
| 9 |
-
import time
|
| 10 |
from diffusers import Flux2KleinPipeline, AutoencoderKLFlux2
|
| 11 |
from PIL import Image
|
| 12 |
from pathlib import Path
|
|
@@ -87,8 +85,8 @@ orange_red_theme = OrangeRedTheme()
|
|
| 87 |
|
| 88 |
# ── Config ────────────────────────────────────────────────────────────────────
|
| 89 |
|
| 90 |
-
dtype
|
| 91 |
-
device
|
| 92 |
|
| 93 |
MAX_SEED = np.iinfo(np.int32).max
|
| 94 |
MAX_IMAGE_SIZE = 1024
|
|
@@ -166,15 +164,6 @@ def parse_input_images(input_images):
|
|
| 166 |
return None
|
| 167 |
|
| 168 |
|
| 169 |
-
def format_time(seconds: float) -> str:
|
| 170 |
-
"""Format seconds into a human-readable string."""
|
| 171 |
-
if seconds < 60:
|
| 172 |
-
return f"{seconds:.2f}s"
|
| 173 |
-
minutes = int(seconds // 60)
|
| 174 |
-
secs = seconds % 60
|
| 175 |
-
return f"{minutes}m {secs:.2f}s"
|
| 176 |
-
|
| 177 |
-
|
| 178 |
# ── Inference ─────────────────────────────────────────────────────────────────
|
| 179 |
|
| 180 |
@spaces.GPU(duration=240)
|
|
@@ -214,15 +203,11 @@ def infer(
|
|
| 214 |
progress(0.05, desc="🟦 Running Standard VAE generation...")
|
| 215 |
print("Starting Standard VAE generation...")
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
out_standard = pipe_standard(**shared_kwargs, generator=gen_std).images[0]
|
| 220 |
-
t1_std = time.perf_counter()
|
| 221 |
-
time_std = t1_std - t0_std
|
| 222 |
-
time_std_str = format_time(time_std)
|
| 223 |
|
| 224 |
-
print(
|
| 225 |
-
progress(0.55, desc=
|
| 226 |
|
| 227 |
gc.collect()
|
| 228 |
torch.cuda.empty_cache()
|
|
@@ -230,24 +215,16 @@ def infer(
|
|
| 230 |
# ── Step 2: Small Decoder VAE ─────────────────────────────────────────────
|
| 231 |
print("Starting Small Decoder VAE generation...")
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
out_small = pipe_small_decoder(**shared_kwargs, generator=gen_small).images[0]
|
| 236 |
-
t1_small = time.perf_counter()
|
| 237 |
-
time_small = t1_small - t0_small
|
| 238 |
-
time_small_str = format_time(time_small)
|
| 239 |
|
| 240 |
-
print(
|
| 241 |
-
progress(1.0, desc=
|
| 242 |
|
| 243 |
gc.collect()
|
| 244 |
torch.cuda.empty_cache()
|
| 245 |
|
| 246 |
-
|
| 247 |
-
label_std = f"🟦 Standard VAE — ⏱ {time_std_str}"
|
| 248 |
-
label_small = f"🟩 Small Decoder VAE — ⏱ {time_small_str}"
|
| 249 |
-
|
| 250 |
-
return out_standard, out_small, seed, label_std, label_small
|
| 251 |
|
| 252 |
|
| 253 |
@spaces.GPU(duration=240)
|
|
@@ -259,7 +236,7 @@ def infer_example(images, prompt):
|
|
| 259 |
else:
|
| 260 |
images_list = images
|
| 261 |
|
| 262 |
-
out_std, out_small, seed_used
|
| 263 |
prompt=prompt,
|
| 264 |
input_images=images_list,
|
| 265 |
seed=0,
|
|
@@ -269,7 +246,7 @@ def infer_example(images, prompt):
|
|
| 269 |
num_inference_steps=4,
|
| 270 |
guidance_scale=1.0,
|
| 271 |
)
|
| 272 |
-
return out_std, out_small, seed_used
|
| 273 |
|
| 274 |
|
| 275 |
# ── CSS ───────────────────────────────────────────────────────────────────────
|
|
@@ -291,15 +268,13 @@ css = """
|
|
| 291 |
display: block;
|
| 292 |
margin-bottom: 6px;
|
| 293 |
}
|
| 294 |
-
.
|
| 295 |
text-align: center;
|
| 296 |
-
font-size: 0.
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
margin-top:
|
| 300 |
-
|
| 301 |
-
border-radius: 12px;
|
| 302 |
-
background: rgba(255,255,255,0.6);
|
| 303 |
}
|
| 304 |
"""
|
| 305 |
|
|
@@ -310,12 +285,13 @@ with gr.Blocks() as demo:
|
|
| 310 |
with gr.Column(elem_id="col-container"):
|
| 311 |
|
| 312 |
gr.Markdown(
|
| 313 |
-
"# **Flux.2-4B-Encoder-Comparator**",
|
| 314 |
elem_id="main-title",
|
| 315 |
)
|
| 316 |
gr.Markdown(
|
| 317 |
-
"Compare **FLUX.2-klein-4B** side-by-side with two VAE decoders — generated **sequentially** from the **same seed**.
|
| 318 |
-
"🟦 **Standard VAE**
|
|
|
|
| 319 |
)
|
| 320 |
|
| 321 |
# ── Main two-column row ───────────────────────────────────────────────
|
|
@@ -343,12 +319,14 @@ with gr.Blocks() as demo:
|
|
| 343 |
# ── Right: outputs ────────────────────────────────────────────────
|
| 344 |
with gr.Column():
|
| 345 |
with gr.Row():
|
| 346 |
-
|
|
|
|
| 347 |
with gr.Column():
|
| 348 |
gr.HTML(
|
| 349 |
'<span class="vae-badge" '
|
| 350 |
'style="background:#FFE0CC;color:#CC3700;">'
|
| 351 |
-
'🟦 Standard VAE
|
|
|
|
| 352 |
)
|
| 353 |
result_standard = gr.Image(
|
| 354 |
label="Standard VAE",
|
|
@@ -357,17 +335,14 @@ with gr.Blocks() as demo:
|
|
| 357 |
format="png",
|
| 358 |
height=280,
|
| 359 |
)
|
| 360 |
-
timing_standard = gr.Markdown(
|
| 361 |
-
value="⏱ Waiting...",
|
| 362 |
-
elem_classes=["timing-label"],
|
| 363 |
-
)
|
| 364 |
|
| 365 |
-
# Small Decoder VAE
|
| 366 |
with gr.Column():
|
| 367 |
gr.HTML(
|
| 368 |
'<span class="vae-badge" '
|
| 369 |
'style="background:#FFF0E5;color:#E63E00;">'
|
| 370 |
-
'🟩 Small Decoder VAE
|
|
|
|
| 371 |
)
|
| 372 |
result_small = gr.Image(
|
| 373 |
label="Small Decoder VAE",
|
|
@@ -376,10 +351,6 @@ with gr.Blocks() as demo:
|
|
| 376 |
format="png",
|
| 377 |
height=280,
|
| 378 |
)
|
| 379 |
-
timing_small = gr.Markdown(
|
| 380 |
-
value="⏱ Waiting...",
|
| 381 |
-
elem_classes=["timing-label"],
|
| 382 |
-
)
|
| 383 |
|
| 384 |
with gr.Accordion("Advanced Settings", open=False):
|
| 385 |
seed_output = gr.Number(label="Seed Used", precision=0)
|
|
@@ -431,10 +402,9 @@ with gr.Blocks() as demo:
|
|
| 431 |
[["examples/2.jpg"], "Transform the scene into a snowy winter day while preserving the original subject identity, framing, and composition."],
|
| 432 |
[["examples/3.jpg"], "Relight the image with soft golden sunset lighting while keeping all structures and subject details consistent."],
|
| 433 |
[["examples/4.jpg"], "Make the texture high-resolution."],
|
| 434 |
-
[None, "A beautiful cyberpunk cityscape at night, neon lights, highly detailed."],
|
| 435 |
],
|
| 436 |
inputs=[input_images, prompt],
|
| 437 |
-
outputs=[result_standard, result_small, seed_output
|
| 438 |
fn=infer_example,
|
| 439 |
cache_examples=False,
|
| 440 |
label="Examples",
|
|
@@ -466,7 +436,7 @@ with gr.Blocks() as demo:
|
|
| 466 |
num_inference_steps,
|
| 467 |
guidance_scale,
|
| 468 |
],
|
| 469 |
-
outputs=[result_standard, result_small, seed_output
|
| 470 |
)
|
| 471 |
|
| 472 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import gc
|
| 3 |
import gradio as gr
|
|
|
|
| 5 |
import random
|
| 6 |
import spaces
|
| 7 |
import torch
|
|
|
|
| 8 |
from diffusers import Flux2KleinPipeline, AutoencoderKLFlux2
|
| 9 |
from PIL import Image
|
| 10 |
from pathlib import Path
|
|
|
|
| 85 |
|
| 86 |
# ── Config ────────────────────────────────────────────────────────────────────
|
| 87 |
|
| 88 |
+
dtype = torch.bfloat16
|
| 89 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 90 |
|
| 91 |
MAX_SEED = np.iinfo(np.int32).max
|
| 92 |
MAX_IMAGE_SIZE = 1024
|
|
|
|
| 164 |
return None
|
| 165 |
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
# ── Inference ─────────────────────────────────────────────────────────────────
|
| 168 |
|
| 169 |
@spaces.GPU(duration=240)
|
|
|
|
| 203 |
progress(0.05, desc="🟦 Running Standard VAE generation...")
|
| 204 |
print("Starting Standard VAE generation...")
|
| 205 |
|
| 206 |
+
gen_std = torch.Generator(device="cpu").manual_seed(seed)
|
| 207 |
+
out_standard = pipe_standard(**shared_kwargs, generator=gen_std).images[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
print("Standard VAE generation complete.")
|
| 210 |
+
progress(0.55, desc="🟦 Standard VAE done — now running 🟩 Small Decoder VAE...")
|
| 211 |
|
| 212 |
gc.collect()
|
| 213 |
torch.cuda.empty_cache()
|
|
|
|
| 215 |
# ── Step 2: Small Decoder VAE ─────────────────────────────────────────────
|
| 216 |
print("Starting Small Decoder VAE generation...")
|
| 217 |
|
| 218 |
+
gen_small = torch.Generator(device="cpu").manual_seed(seed)
|
| 219 |
+
out_small = pipe_small_decoder(**shared_kwargs, generator=gen_small).images[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
+
print("Small Decoder VAE generation complete.")
|
| 222 |
+
progress(1.0, desc="✅ Both generations complete!")
|
| 223 |
|
| 224 |
gc.collect()
|
| 225 |
torch.cuda.empty_cache()
|
| 226 |
|
| 227 |
+
return out_standard, out_small, seed
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
|
| 230 |
@spaces.GPU(duration=240)
|
|
|
|
| 236 |
else:
|
| 237 |
images_list = images
|
| 238 |
|
| 239 |
+
out_std, out_small, seed_used = infer(
|
| 240 |
prompt=prompt,
|
| 241 |
input_images=images_list,
|
| 242 |
seed=0,
|
|
|
|
| 246 |
num_inference_steps=4,
|
| 247 |
guidance_scale=1.0,
|
| 248 |
)
|
| 249 |
+
return out_std, out_small, seed_used
|
| 250 |
|
| 251 |
|
| 252 |
# ── CSS ───────────────────────────────────────────────────────────────────────
|
|
|
|
| 268 |
display: block;
|
| 269 |
margin-bottom: 6px;
|
| 270 |
}
|
| 271 |
+
.output-order-note {
|
| 272 |
text-align: center;
|
| 273 |
+
font-size: 0.85em;
|
| 274 |
+
color: #777;
|
| 275 |
+
font-style: italic;
|
| 276 |
+
margin-top: 2px;
|
| 277 |
+
margin-bottom: 4px;
|
|
|
|
|
|
|
| 278 |
}
|
| 279 |
"""
|
| 280 |
|
|
|
|
| 285 |
with gr.Column(elem_id="col-container"):
|
| 286 |
|
| 287 |
gr.Markdown(
|
| 288 |
+
"# ⚡ **Flux.2-4B-Encoder-Comparator**",
|
| 289 |
elem_id="main-title",
|
| 290 |
)
|
| 291 |
gr.Markdown(
|
| 292 |
+
"Compare **FLUX.2-klein-4B** side-by-side with two VAE decoders — generated **sequentially** from the **same seed**.\n\n"
|
| 293 |
+
"🟦 **Standard VAE** is generated **first**, then 🟩 **Small Decoder VAE** ([FLUX.2-small-decoder](https://huggingface.co/black-forest-labs/FLUX.2-small-decoder)) · "
|
| 294 |
+
"[[model](https://huggingface.co/black-forest-labs/FLUX.2-klein-4B)]"
|
| 295 |
)
|
| 296 |
|
| 297 |
# ── Main two-column row ───────────────────────────────────────────────
|
|
|
|
| 319 |
# ── Right: outputs ────────────────────────────────────────────────
|
| 320 |
with gr.Column():
|
| 321 |
with gr.Row():
|
| 322 |
+
|
| 323 |
+
# Standard VAE — Generated First
|
| 324 |
with gr.Column():
|
| 325 |
gr.HTML(
|
| 326 |
'<span class="vae-badge" '
|
| 327 |
'style="background:#FFE0CC;color:#CC3700;">'
|
| 328 |
+
'🟦 Standard VAE</span>'
|
| 329 |
+
'<p class="output-order-note">① Generated First</p>'
|
| 330 |
)
|
| 331 |
result_standard = gr.Image(
|
| 332 |
label="Standard VAE",
|
|
|
|
| 335 |
format="png",
|
| 336 |
height=280,
|
| 337 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
|
| 339 |
+
# Small Decoder VAE — Generated Second
|
| 340 |
with gr.Column():
|
| 341 |
gr.HTML(
|
| 342 |
'<span class="vae-badge" '
|
| 343 |
'style="background:#FFF0E5;color:#E63E00;">'
|
| 344 |
+
'🟩 Small Decoder VAE</span>'
|
| 345 |
+
'<p class="output-order-note">② Generated Second</p>'
|
| 346 |
)
|
| 347 |
result_small = gr.Image(
|
| 348 |
label="Small Decoder VAE",
|
|
|
|
| 351 |
format="png",
|
| 352 |
height=280,
|
| 353 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
with gr.Accordion("Advanced Settings", open=False):
|
| 356 |
seed_output = gr.Number(label="Seed Used", precision=0)
|
|
|
|
| 402 |
[["examples/2.jpg"], "Transform the scene into a snowy winter day while preserving the original subject identity, framing, and composition."],
|
| 403 |
[["examples/3.jpg"], "Relight the image with soft golden sunset lighting while keeping all structures and subject details consistent."],
|
| 404 |
[["examples/4.jpg"], "Make the texture high-resolution."],
|
|
|
|
| 405 |
],
|
| 406 |
inputs=[input_images, prompt],
|
| 407 |
+
outputs=[result_standard, result_small, seed_output],
|
| 408 |
fn=infer_example,
|
| 409 |
cache_examples=False,
|
| 410 |
label="Examples",
|
|
|
|
| 436 |
num_inference_steps,
|
| 437 |
guidance_scale,
|
| 438 |
],
|
| 439 |
+
outputs=[result_standard, result_small, seed_output],
|
| 440 |
)
|
| 441 |
|
| 442 |
if __name__ == "__main__":
|