recoilme commited on
Commit
7663918
·
1 Parent(s): d47f187
samples/sample_decoded_0.jpg CHANGED

Git LFS Details

  • SHA256: bd7d390faad799536a62643cd7f02bc0fbb2d4aff89b03d7324ebb311d4f655e
  • Pointer size: 131 Bytes
  • Size of remote file: 140 kB

Git LFS Details

  • SHA256: f5e3e5ffee1ec3eee39b4d598e81ea88adf5da797f8ef39977ccd0f8e305c749
  • Pointer size: 131 Bytes
  • Size of remote file: 127 kB
samples/sample_decoded_1.jpg CHANGED

Git LFS Details

  • SHA256: 78db533a49865fe85bdfdd2c10202b49217d3db814bf9c708121b7c82ef1089e
  • Pointer size: 130 Bytes
  • Size of remote file: 69.6 kB

Git LFS Details

  • SHA256: 99035f8b2b03102dc827377da6ccdc471e589784d8c10d45d1f0ea86266b08c6
  • Pointer size: 130 Bytes
  • Size of remote file: 68.7 kB
samples/sample_decoded_2.jpg CHANGED

Git LFS Details

  • SHA256: 740fdbebb80e4c539cfc8f82cde2c6578f29f02ff1b0a71fb454f2af7d1c82a0
  • Pointer size: 130 Bytes
  • Size of remote file: 83.4 kB

Git LFS Details

  • SHA256: e84abbd76a57f5fa7ae3a8dc0a26646962ae863115f507fe57d42338b07d3ab3
  • Pointer size: 130 Bytes
  • Size of remote file: 71.1 kB
samples/sample_real_0.jpg CHANGED

Git LFS Details

  • SHA256: c43d71f0fd394f8b8708151c7486348e154ec2f2832cd319f608d6e9d2c324f9
  • Pointer size: 131 Bytes
  • Size of remote file: 151 kB

Git LFS Details

  • SHA256: 52e7815134a168f129ffd380eb2c6c3c8f02c6cb32889aa6314999b09df2fdd9
  • Pointer size: 131 Bytes
  • Size of remote file: 134 kB
samples/sample_real_1.jpg CHANGED

Git LFS Details

  • SHA256: 91bc92c30f5ca06e9c34c8e915bf9cae2a710f1fb8a60842b8471428f4e2e3f9
  • Pointer size: 130 Bytes
  • Size of remote file: 69.1 kB

Git LFS Details

  • SHA256: 763eee69db9f0833ab4da8c81cdcff280ed753beeb07b42d29a213fb5c5910c3
  • Pointer size: 130 Bytes
  • Size of remote file: 70.3 kB
samples/sample_real_2.jpg CHANGED

Git LFS Details

  • SHA256: d230f08be5595fba27c422560a77f161b07d4f642790f72ad122bb2b58b9a99f
  • Pointer size: 130 Bytes
  • Size of remote file: 83.6 kB

Git LFS Details

  • SHA256: 4a8018e8fa6a94f1e58c338390b23c132da30566cbc0af8710bfd5a960456085
  • Pointer size: 130 Bytes
  • Size of remote file: 67.8 kB
train_vae_fdl.py CHANGED
@@ -29,11 +29,11 @@ from collections import deque
29
 
30
  # --------------------------- Параметры ---------------------------
31
  ds_path = "/workspace/d23"
32
- project = "vae"
33
  batch_size = 2
34
  base_learning_rate = 5e-5
35
  min_learning_rate = 1e-5
36
- num_epochs = 50
37
  sample_interval_share = 5
38
  use_wandb = True
39
  save_model = True
@@ -41,8 +41,8 @@ use_decay = True
41
  optimizer_type = "adam8bit"
42
  dtype = torch.float32
43
 
44
- model_resolution = 256
45
- high_resolution = 512
46
  limit = 0
47
  save_barrier = 1.3
48
  warmup_percent = 0.005
@@ -51,9 +51,9 @@ beta2 = 0.997
51
  eps = 1e-8
52
  clip_grad_norm = 1.0
53
  mixed_precision = "no"
54
- gradient_accumulation_steps = 2
55
  generated_folder = "samples"
56
- save_as = "vae4"
57
  num_workers = 0
58
  device = None
59
 
@@ -67,10 +67,10 @@ kl_ratio = 0.00
67
  # Доли лоссов
68
  loss_ratios = {
69
  "lpips": 0.70,#0.50,
70
- "fdl" : 0.05,#0.25,
71
  "edge": 0.05,
72
  "mse": 0.10,
73
- "mae": 0.10,
74
  "kl": 0.00, # активируем при full_training=True
75
  }
76
  median_coeff_steps = 1000
@@ -422,8 +422,8 @@ def generate_and_save_samples(step=None):
422
  rec = temp_vae.decode(latents_mean).sample
423
 
424
  # Подгон размеров, если надо
425
- if rec.shape[-2:] != orig_high.shape[-2:]:
426
- rec = F.interpolate(rec, size=orig_high.shape[-2:], mode="bilinear", align_corners=False)
427
 
428
  # Сохраняем все real/decoded
429
  for i in range(rec.shape[0]):
@@ -437,8 +437,8 @@ def generate_and_save_samples(step=None):
437
  for i in range(rec.shape[0]):
438
  orig_full = orig_high[i:i+1].to(torch.float32)
439
  rec_full = rec[i:i+1].to(torch.float32)
440
- if rec_full.shape[-2:] != orig_full.shape[-2:]:
441
- rec_full = F.interpolate(rec_full, size=orig_full.shape[-2:], mode="bilinear", align_corners=False)
442
  lpips_val = lpips_net(orig_full, rec_full).item()
443
  lpips_scores.append(lpips_val)
444
  avg_lpips = float(np.mean(lpips_scores))
@@ -497,8 +497,8 @@ for epoch in range(num_epochs):
497
  latents = enc.latent_dist.mean if train_decoder_only else enc.latent_dist.sample()
498
  rec = vae.decode(latents).sample
499
 
500
- if rec.shape[-2:] != imgs.shape[-2:]:
501
- rec = F.interpolate(rec, size=imgs.shape[-2:], mode="bilinear", align_corners=False)
502
 
503
  rec_f32 = rec.to(torch.float32)
504
  imgs_f32 = imgs.to(torch.float32)
 
29
 
30
  # --------------------------- Параметры ---------------------------
31
  ds_path = "/workspace/d23"
32
+ project = "vae4"
33
  batch_size = 2
34
  base_learning_rate = 5e-5
35
  min_learning_rate = 1e-5
36
+ num_epochs = 25
37
  sample_interval_share = 5
38
  use_wandb = True
39
  save_model = True
 
41
  optimizer_type = "adam8bit"
42
  dtype = torch.float32
43
 
44
+ model_resolution = 288
45
+ high_resolution = 576
46
  limit = 0
47
  save_barrier = 1.3
48
  warmup_percent = 0.005
 
51
  eps = 1e-8
52
  clip_grad_norm = 1.0
53
  mixed_precision = "no"
54
+ gradient_accumulation_steps = 4
55
  generated_folder = "samples"
56
+ save_as = "vae5"
57
  num_workers = 0
58
  device = None
59
 
 
67
  # Доли лоссов
68
  loss_ratios = {
69
  "lpips": 0.70,#0.50,
70
+ "fdl" : 0.10,#0.25,
71
  "edge": 0.05,
72
  "mse": 0.10,
73
+ "mae": 0.05,
74
  "kl": 0.00, # активируем при full_training=True
75
  }
76
  median_coeff_steps = 1000
 
422
  rec = temp_vae.decode(latents_mean).sample
423
 
424
  # Подгон размеров, если надо
425
+ #if rec.shape[-2:] != orig_high.shape[-2:]:
426
+ # rec = F.interpolate(rec, size=orig_high.shape[-2:], mode="bilinear", align_corners=False)
427
 
428
  # Сохраняем все real/decoded
429
  for i in range(rec.shape[0]):
 
437
  for i in range(rec.shape[0]):
438
  orig_full = orig_high[i:i+1].to(torch.float32)
439
  rec_full = rec[i:i+1].to(torch.float32)
440
+ #if rec_full.shape[-2:] != orig_full.shape[-2:]:
441
+ # rec_full = F.interpolate(rec_full, size=orig_full.shape[-2:], mode="bilinear", align_corners=False)
442
  lpips_val = lpips_net(orig_full, rec_full).item()
443
  lpips_scores.append(lpips_val)
444
  avg_lpips = float(np.mean(lpips_scores))
 
497
  latents = enc.latent_dist.mean if train_decoder_only else enc.latent_dist.sample()
498
  rec = vae.decode(latents).sample
499
 
500
+ #if rec.shape[-2:] != imgs.shape[-2:]:
501
+ # rec = F.interpolate(rec, size=imgs.shape[-2:], mode="bilinear", align_corners=False)
502
 
503
  rec_f32 = rec.to(torch.float32)
504
  imgs_f32 = imgs.to(torch.float32)
vae5/config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AsymmetricAutoencoderKL",
3
+ "_diffusers_version": "0.35.2",
4
+ "_name_or_path": "vae4",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 128,
9
+ 256,
10
+ 512,
11
+ 512
12
+ ],
13
+ "down_block_out_channels": [
14
+ 128,
15
+ 256,
16
+ 512,
17
+ 512
18
+ ],
19
+ "down_block_types": [
20
+ "DownEncoderBlock2D",
21
+ "DownEncoderBlock2D",
22
+ "DownEncoderBlock2D",
23
+ "DownEncoderBlock2D"
24
+ ],
25
+ "force_upcast": false,
26
+ "in_channels": 3,
27
+ "latent_channels": 16,
28
+ "layers_per_down_block": 2,
29
+ "layers_per_up_block": 2,
30
+ "norm_num_groups": 32,
31
+ "out_channels": 3,
32
+ "sample_size": 1024,
33
+ "scaling_factor": 1.0,
34
+ "up_block_out_channels": [
35
+ 128,
36
+ 128,
37
+ 256,
38
+ 512,
39
+ 512
40
+ ],
41
+ "up_block_types": [
42
+ "UpDecoderBlock2D",
43
+ "UpDecoderBlock2D",
44
+ "UpDecoderBlock2D",
45
+ "UpDecoderBlock2D",
46
+ "UpDecoderBlock2D"
47
+ ]
48
+ }
vae5/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f63f8b5ed49e521d3f9967b5e4ac91d2dbec34e0841a8b322f7deb56fa07463e
3
+ size 382598708