Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -316,34 +316,11 @@ clip_tok = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
|
| 316 |
clip_enc = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=DTYPE)
|
| 317 |
print("✓ Text encoders loaded")
|
| 318 |
|
| 319 |
-
# VAE (local
|
| 320 |
print("Loading VAE...")
|
| 321 |
-
|
| 322 |
-
# Flux VAE config (16 latent channels, not standard SD 8)
|
| 323 |
-
vae_config = {
|
| 324 |
-
"in_channels": 3,
|
| 325 |
-
"out_channels": 3,
|
| 326 |
-
"latent_channels": 16,
|
| 327 |
-
"block_out_channels": [128, 256, 512, 512],
|
| 328 |
-
"down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"],
|
| 329 |
-
"up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"],
|
| 330 |
-
"layers_per_block": 2,
|
| 331 |
-
"norm_num_groups": 32,
|
| 332 |
-
"act_fn": "silu",
|
| 333 |
-
"scaling_factor": 0.3611,
|
| 334 |
-
"shift_factor": 0.1159,
|
| 335 |
-
"use_quant_conv": False,
|
| 336 |
-
"use_post_quant_conv": False,
|
| 337 |
-
"mid_block_add_attention": True,
|
| 338 |
-
}
|
| 339 |
-
|
| 340 |
-
vae = AutoencoderKL.from_single_file(
|
| 341 |
-
"ae.safetensors",
|
| 342 |
-
config=vae_config,
|
| 343 |
-
torch_dtype=DTYPE,
|
| 344 |
-
)
|
| 345 |
vae.eval()
|
| 346 |
-
VAE_SCALE =
|
| 347 |
print("✓ VAE loaded")
|
| 348 |
|
| 349 |
|
|
|
|
| 316 |
clip_enc = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=DTYPE)
|
| 317 |
print("✓ Text encoders loaded")
|
| 318 |
|
| 319 |
+
# VAE (local diffusers format)
|
| 320 |
print("Loading VAE...")
|
| 321 |
+
vae = AutoencoderKL.from_pretrained("./vae", torch_dtype=DTYPE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
vae.eval()
|
| 323 |
+
VAE_SCALE = vae.config.scaling_factor
|
| 324 |
print("✓ VAE loaded")
|
| 325 |
|
| 326 |
|