2511
Browse files- samples/sample_decoded_0.jpg +2 -2
- samples/sample_decoded_1.jpg +2 -2
- samples/sample_decoded_2.jpg +2 -2
- samples/sample_real_0.jpg +2 -2
- samples/sample_real_1.jpg +2 -2
- samples/sample_real_2.jpg +2 -2
- train_vae_fdl.py +14 -14
- vae5/config.json +48 -0
- vae5/diffusion_pytorch_model.safetensors +3 -0
samples/sample_decoded_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/sample_decoded_1.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/sample_decoded_2.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/sample_real_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/sample_real_1.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/sample_real_2.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
train_vae_fdl.py
CHANGED
|
@@ -29,11 +29,11 @@ from collections import deque
|
|
| 29 |
|
| 30 |
# --------------------------- Параметры ---------------------------
|
| 31 |
ds_path = "/workspace/d23"
|
| 32 |
-
project = "
|
| 33 |
batch_size = 2
|
| 34 |
base_learning_rate = 5e-5
|
| 35 |
min_learning_rate = 1e-5
|
| 36 |
-
num_epochs =
|
| 37 |
sample_interval_share = 5
|
| 38 |
use_wandb = True
|
| 39 |
save_model = True
|
|
@@ -41,8 +41,8 @@ use_decay = True
|
|
| 41 |
optimizer_type = "adam8bit"
|
| 42 |
dtype = torch.float32
|
| 43 |
|
| 44 |
-
model_resolution =
|
| 45 |
-
high_resolution =
|
| 46 |
limit = 0
|
| 47 |
save_barrier = 1.3
|
| 48 |
warmup_percent = 0.005
|
|
@@ -51,9 +51,9 @@ beta2 = 0.997
|
|
| 51 |
eps = 1e-8
|
| 52 |
clip_grad_norm = 1.0
|
| 53 |
mixed_precision = "no"
|
| 54 |
-
gradient_accumulation_steps =
|
| 55 |
generated_folder = "samples"
|
| 56 |
-
save_as = "
|
| 57 |
num_workers = 0
|
| 58 |
device = None
|
| 59 |
|
|
@@ -67,10 +67,10 @@ kl_ratio = 0.00
|
|
| 67 |
# Доли лоссов
|
| 68 |
loss_ratios = {
|
| 69 |
"lpips": 0.70,#0.50,
|
| 70 |
-
"fdl" : 0.
|
| 71 |
"edge": 0.05,
|
| 72 |
"mse": 0.10,
|
| 73 |
-
"mae": 0.
|
| 74 |
"kl": 0.00, # активируем при full_training=True
|
| 75 |
}
|
| 76 |
median_coeff_steps = 1000
|
|
@@ -422,8 +422,8 @@ def generate_and_save_samples(step=None):
|
|
| 422 |
rec = temp_vae.decode(latents_mean).sample
|
| 423 |
|
| 424 |
# Подгон размеров, если надо
|
| 425 |
-
if rec.shape[-2:] != orig_high.shape[-2:]:
|
| 426 |
-
|
| 427 |
|
| 428 |
# Сохраняем все real/decoded
|
| 429 |
for i in range(rec.shape[0]):
|
|
@@ -437,8 +437,8 @@ def generate_and_save_samples(step=None):
|
|
| 437 |
for i in range(rec.shape[0]):
|
| 438 |
orig_full = orig_high[i:i+1].to(torch.float32)
|
| 439 |
rec_full = rec[i:i+1].to(torch.float32)
|
| 440 |
-
if rec_full.shape[-2:] != orig_full.shape[-2:]:
|
| 441 |
-
|
| 442 |
lpips_val = lpips_net(orig_full, rec_full).item()
|
| 443 |
lpips_scores.append(lpips_val)
|
| 444 |
avg_lpips = float(np.mean(lpips_scores))
|
|
@@ -497,8 +497,8 @@ for epoch in range(num_epochs):
|
|
| 497 |
latents = enc.latent_dist.mean if train_decoder_only else enc.latent_dist.sample()
|
| 498 |
rec = vae.decode(latents).sample
|
| 499 |
|
| 500 |
-
if rec.shape[-2:] != imgs.shape[-2:]:
|
| 501 |
-
|
| 502 |
|
| 503 |
rec_f32 = rec.to(torch.float32)
|
| 504 |
imgs_f32 = imgs.to(torch.float32)
|
|
|
|
| 29 |
|
| 30 |
# --------------------------- Параметры ---------------------------
|
| 31 |
ds_path = "/workspace/d23"
|
| 32 |
+
project = "vae4"
|
| 33 |
batch_size = 2
|
| 34 |
base_learning_rate = 5e-5
|
| 35 |
min_learning_rate = 1e-5
|
| 36 |
+
num_epochs = 25
|
| 37 |
sample_interval_share = 5
|
| 38 |
use_wandb = True
|
| 39 |
save_model = True
|
|
|
|
| 41 |
optimizer_type = "adam8bit"
|
| 42 |
dtype = torch.float32
|
| 43 |
|
| 44 |
+
model_resolution = 288
|
| 45 |
+
high_resolution = 576
|
| 46 |
limit = 0
|
| 47 |
save_barrier = 1.3
|
| 48 |
warmup_percent = 0.005
|
|
|
|
| 51 |
eps = 1e-8
|
| 52 |
clip_grad_norm = 1.0
|
| 53 |
mixed_precision = "no"
|
| 54 |
+
gradient_accumulation_steps = 4
|
| 55 |
generated_folder = "samples"
|
| 56 |
+
save_as = "vae5"
|
| 57 |
num_workers = 0
|
| 58 |
device = None
|
| 59 |
|
|
|
|
| 67 |
# Доли лоссов
|
| 68 |
loss_ratios = {
|
| 69 |
"lpips": 0.70,#0.50,
|
| 70 |
+
"fdl" : 0.10,#0.25,
|
| 71 |
"edge": 0.05,
|
| 72 |
"mse": 0.10,
|
| 73 |
+
"mae": 0.05,
|
| 74 |
"kl": 0.00, # активируем при full_training=True
|
| 75 |
}
|
| 76 |
median_coeff_steps = 1000
|
|
|
|
| 422 |
rec = temp_vae.decode(latents_mean).sample
|
| 423 |
|
| 424 |
# Подгон размеров, если надо
|
| 425 |
+
#if rec.shape[-2:] != orig_high.shape[-2:]:
|
| 426 |
+
# rec = F.interpolate(rec, size=orig_high.shape[-2:], mode="bilinear", align_corners=False)
|
| 427 |
|
| 428 |
# Сохраняем все real/decoded
|
| 429 |
for i in range(rec.shape[0]):
|
|
|
|
| 437 |
for i in range(rec.shape[0]):
|
| 438 |
orig_full = orig_high[i:i+1].to(torch.float32)
|
| 439 |
rec_full = rec[i:i+1].to(torch.float32)
|
| 440 |
+
#if rec_full.shape[-2:] != orig_full.shape[-2:]:
|
| 441 |
+
# rec_full = F.interpolate(rec_full, size=orig_full.shape[-2:], mode="bilinear", align_corners=False)
|
| 442 |
lpips_val = lpips_net(orig_full, rec_full).item()
|
| 443 |
lpips_scores.append(lpips_val)
|
| 444 |
avg_lpips = float(np.mean(lpips_scores))
|
|
|
|
| 497 |
latents = enc.latent_dist.mean if train_decoder_only else enc.latent_dist.sample()
|
| 498 |
rec = vae.decode(latents).sample
|
| 499 |
|
| 500 |
+
#if rec.shape[-2:] != imgs.shape[-2:]:
|
| 501 |
+
# rec = F.interpolate(rec, size=imgs.shape[-2:], mode="bilinear", align_corners=False)
|
| 502 |
|
| 503 |
rec_f32 = rec.to(torch.float32)
|
| 504 |
imgs_f32 = imgs.to(torch.float32)
|
vae5/config.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AsymmetricAutoencoderKL",
|
| 3 |
+
"_diffusers_version": "0.35.2",
|
| 4 |
+
"_name_or_path": "vae4",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"block_out_channels": [
|
| 7 |
+
128,
|
| 8 |
+
128,
|
| 9 |
+
256,
|
| 10 |
+
512,
|
| 11 |
+
512
|
| 12 |
+
],
|
| 13 |
+
"down_block_out_channels": [
|
| 14 |
+
128,
|
| 15 |
+
256,
|
| 16 |
+
512,
|
| 17 |
+
512
|
| 18 |
+
],
|
| 19 |
+
"down_block_types": [
|
| 20 |
+
"DownEncoderBlock2D",
|
| 21 |
+
"DownEncoderBlock2D",
|
| 22 |
+
"DownEncoderBlock2D",
|
| 23 |
+
"DownEncoderBlock2D"
|
| 24 |
+
],
|
| 25 |
+
"force_upcast": false,
|
| 26 |
+
"in_channels": 3,
|
| 27 |
+
"latent_channels": 16,
|
| 28 |
+
"layers_per_down_block": 2,
|
| 29 |
+
"layers_per_up_block": 2,
|
| 30 |
+
"norm_num_groups": 32,
|
| 31 |
+
"out_channels": 3,
|
| 32 |
+
"sample_size": 1024,
|
| 33 |
+
"scaling_factor": 1.0,
|
| 34 |
+
"up_block_out_channels": [
|
| 35 |
+
128,
|
| 36 |
+
128,
|
| 37 |
+
256,
|
| 38 |
+
512,
|
| 39 |
+
512
|
| 40 |
+
],
|
| 41 |
+
"up_block_types": [
|
| 42 |
+
"UpDecoderBlock2D",
|
| 43 |
+
"UpDecoderBlock2D",
|
| 44 |
+
"UpDecoderBlock2D",
|
| 45 |
+
"UpDecoderBlock2D",
|
| 46 |
+
"UpDecoderBlock2D"
|
| 47 |
+
]
|
| 48 |
+
}
|
vae5/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f63f8b5ed49e521d3f9967b5e4ac91d2dbec34e0841a8b322f7deb56fa07463e
|
| 3 |
+
size 382598708
|