recoilme commited on
Commit
16ace30
·
1 Parent(s): 9614bab
media/result_grid.jpg CHANGED

Git LFS Details

  • SHA256: 1fb1de7bd02a63e30a731b5d66cd081d3937e91a64e29bbf8af6dfa6b2159b00
  • Pointer size: 132 Bytes
  • Size of remote file: 1.31 MB

Git LFS Details

  • SHA256: 6d9c2c2c031775c0bf23411f34795d17f49990f5f52f48f6048b7bef2bab2d57
  • Pointer size: 132 Bytes
  • Size of remote file: 4.85 MB
pipeline_sdxs.py CHANGED
@@ -78,8 +78,22 @@ class SdxsPipeline(DiffusionPipeline):
78
  sequence_lengths = attention_mask.sum(dim=1) - 1
79
  batch_size = hidden.shape[0]
80
  pooled = hidden[torch.arange(batch_size, device=hidden.device), sequence_lengths]
81
-
82
- return hidden, attention_mask, pooled
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # Кодируем позитивные и негативные промпты
85
  # ИСПРАВЛЕНИЕ: Теперь возвращаем (None, None, None), чтобы избежать UnboundLocalError
@@ -190,7 +204,7 @@ class SdxsPipeline(DiffusionPipeline):
190
  t,
191
  encoder_hidden_states=text_embeddings,
192
  encoder_attention_mask=unet_attention_mask,
193
- added_cond_kwargs={'text_embeds': unet_pooled_embeddings}
194
  ).sample
195
 
196
  if guidance_scale > 1:
 
78
  sequence_lengths = attention_mask.sum(dim=1) - 1
79
  batch_size = hidden.shape[0]
80
  pooled = hidden[torch.arange(batch_size, device=hidden.device), sequence_lengths]
81
+
82
+ # --- НОВАЯ ЛОГИКА: ОБЪЕДИНЕНИЕ ДЛЯ КРОСС-ВНИМАНИЯ ---
83
+ # 1. Расширяем пулинг-вектор до последовательности [B, 1, 1024]
84
+ pooled_expanded = pooled.unsqueeze(1)
85
+
86
+ # 2. Объединяем последовательность токенов и пулинг-вектор
87
+ # !!! ИЗМЕНЕНИЕ ЗДЕСЬ !!!: Пулинг идет ПЕРВЫМ
88
+ # Теперь: [B, 1 + L, 1024]. Пулинг стал токеном в НАЧАЛЕ.
89
+ new_encoder_hidden_states = torch.cat([pooled_expanded, hidden], dim=1)
90
+
91
+ # 3. Обновляем маску внимания для нового токена
92
+ # Маска внимания: [B, 1 + L]. Добавляем 1 в НАЧАЛО.
93
+ # torch.ones((batch_size, 1), device=device) создает маску [B, 1] со значениями 1.
94
+ new_attention_mask = torch.cat([torch.ones((batch_size, 1), device=device), attention_mask], dim=1)
95
+
96
+ return new_encoder_hidden_states, new_attention_mask, pooled
97
 
98
  # Кодируем позитивные и негативные промпты
99
  # ИСПРАВЛЕНИЕ: Теперь возвращаем (None, None, None), чтобы избежать UnboundLocalError
 
204
  t,
205
  encoder_hidden_states=text_embeddings,
206
  encoder_attention_mask=unet_attention_mask,
207
+ #added_cond_kwargs={'text_embeds': unet_pooled_embeddings}
208
  ).sample
209
 
210
  if guidance_scale > 1:
samples/unet_320x640_0.jpg CHANGED

Git LFS Details

  • SHA256: 1d8c59425b87c7f5aaf17d68bb1d79d62010487505cfac67f170d0bdacd337c9
  • Pointer size: 130 Bytes
  • Size of remote file: 57.5 kB

Git LFS Details

  • SHA256: c9929b85183b46a9b56fc82cfa9a3b3d09a3cad9144ffeb7a757eb3008faf26f
  • Pointer size: 130 Bytes
  • Size of remote file: 74 kB
samples/unet_384x640_0.jpg CHANGED

Git LFS Details

  • SHA256: eeac8402ead8792a0156bebd09dd6437144d9f1cbc275098a4412df9d15d4372
  • Pointer size: 131 Bytes
  • Size of remote file: 155 kB

Git LFS Details

  • SHA256: bbe4d6bb7767a9677a4f597010a94f426b711b5f52644fa8ac9c188bd99f67c6
  • Pointer size: 131 Bytes
  • Size of remote file: 154 kB
samples/unet_448x640_0.jpg CHANGED

Git LFS Details

  • SHA256: e21e15c7fe2b29858d232b1d2ce7901bec3f3f8acbd519d0c76a7cbe081166ec
  • Pointer size: 130 Bytes
  • Size of remote file: 71.8 kB

Git LFS Details

  • SHA256: 8740815bd5d32d21e8cef4ac2fb9680ab1f52db09e6546d1ffa78254303478f9
  • Pointer size: 130 Bytes
  • Size of remote file: 89.3 kB
samples/unet_512x640_0.jpg CHANGED

Git LFS Details

  • SHA256: a6ebca3230873f7691d34d615c0e3c0affe4c4e860b4a3fd827661185a3e6d95
  • Pointer size: 131 Bytes
  • Size of remote file: 106 kB

Git LFS Details

  • SHA256: 700aa94e24c559da490a1158f0f1917534f427e3d7f3474079fcbfc61e3214a9
  • Pointer size: 131 Bytes
  • Size of remote file: 155 kB
samples/unet_576x640_0.jpg CHANGED

Git LFS Details

  • SHA256: eae34c04ee05477b8ee57a6528d7669dfcab592c3216fdc9a5dec17d1dd5baa8
  • Pointer size: 131 Bytes
  • Size of remote file: 132 kB

Git LFS Details

  • SHA256: d5476a03010634c4fb8231a8e3b4af146cd8a56d04af72a811ce7d029e0ec392
  • Pointer size: 130 Bytes
  • Size of remote file: 36.3 kB
samples/unet_640x320_0.jpg CHANGED

Git LFS Details

  • SHA256: 7a950b931518c8ed58021d559016fc488111931ef7793590390d837fdf25f504
  • Pointer size: 131 Bytes
  • Size of remote file: 103 kB

Git LFS Details

  • SHA256: 107fde42a18cc3d168e1fe0f0f399c12d647cc1e7dac188c0af0edfd2b052696
  • Pointer size: 130 Bytes
  • Size of remote file: 14.7 kB
samples/unet_640x384_0.jpg CHANGED

Git LFS Details

  • SHA256: eade85e26c87b47c1030195b86ae565181300e30dc9f36e92dd72df5f2ebcaeb
  • Pointer size: 131 Bytes
  • Size of remote file: 172 kB

Git LFS Details

  • SHA256: e9d60664fc9533b2043cebcd23c05cea36e91b2abd58ffc0886f8388207d67dd
  • Pointer size: 130 Bytes
  • Size of remote file: 88.1 kB
samples/unet_640x448_0.jpg CHANGED

Git LFS Details

  • SHA256: 7fc94e04d184b961e460a8b70b222dce4e8d4945fc69a47541f4beff155e2d55
  • Pointer size: 130 Bytes
  • Size of remote file: 78.5 kB

Git LFS Details

  • SHA256: b9bb378e30180c8a6d43bf0affbf4182ad9ca99bdff7d3fdc2859d77d1fc2cb8
  • Pointer size: 130 Bytes
  • Size of remote file: 25 kB
samples/unet_640x512_0.jpg CHANGED

Git LFS Details

  • SHA256: 4d354271bbb4ccee5be98837572d24dd44e6adfd23e9527d7e12703bdd3e3987
  • Pointer size: 131 Bytes
  • Size of remote file: 175 kB

Git LFS Details

  • SHA256: ac922f30b0f8294308374a2d4eb981d565b2f9339ecbb38c2ba7b4a29a672c0c
  • Pointer size: 131 Bytes
  • Size of remote file: 135 kB
samples/unet_640x576_0.jpg CHANGED

Git LFS Details

  • SHA256: 5670c9a0a13e664d280309dc966e26c8ef1163fe95f9d750b930eff7cc196b7e
  • Pointer size: 131 Bytes
  • Size of remote file: 100 kB

Git LFS Details

  • SHA256: 2ed4552767ee98f0cf7ff74040bb17be2f48239cbbf7a4590d6128574fb3c045
  • Pointer size: 131 Bytes
  • Size of remote file: 119 kB
samples/unet_640x640_0.jpg CHANGED

Git LFS Details

  • SHA256: 12157ea62dc45877a8744b5835ba149cfb6eb29a1330f05fcdf98a8bd0bed3e7
  • Pointer size: 131 Bytes
  • Size of remote file: 280 kB

Git LFS Details

  • SHA256: cd8641e8153b9d2eb064e459593c2dcf4c3b404cfafb8aa93ea5c90eb82286be
  • Pointer size: 131 Bytes
  • Size of remote file: 148 kB
src/sdxs_create.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d542ea503b79500cf1ee2bec8f9a82807d0520579664ab363b4611c1971620c9
3
- size 8018
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e204cfa450a5fed8f3651be4a44f5ba8c86108bf4e51c9c61f6bee8d6a4be98f
3
+ size 8034
test.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70b3ec42fbbada7f67f0477e673da4b15da141fa481ff04f709680ca0eaf773f
3
- size 4393987
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b00b404e66b2ee215280298d3e670b5c6d7ff7d70075052011f4f7719973f5
3
+ size 4598308
train.py CHANGED
@@ -32,7 +32,7 @@ batch_size = 256
32
  base_learning_rate = 3e-5
33
  min_learning_rate = 2.5e-5
34
  num_epochs = 10
35
- sample_interval_share = 20
36
  max_length = 192
37
  use_wandb = True
38
  use_comet_ml = False
@@ -170,7 +170,22 @@ def encode_texts(texts, max_length=max_length):
170
  batch_size = hidden.shape[0]
171
  pooled = hidden[torch.arange(batch_size, device=hidden.device), sequence_lengths]
172
 
173
- return hidden, attention_mask, pooled
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  shift_factor = getattr(vae.config, "shift_factor", 0.0)
176
  if shift_factor is None: shift_factor = 0.0
@@ -482,7 +497,7 @@ def generate_and_save_samples(fixed_samples_cpu, uncond_data, step):
482
  t,
483
  encoder_hidden_states=text_embeddings_batch,
484
  encoder_attention_mask=attention_mask_batch,
485
- added_cond_kwargs={"text_embeds": pooled_batch} # <--- ПУЛИНГ ЗДЕСЬ
486
  )
487
  flow = getattr(model_out, "sample", model_out)
488
 
@@ -606,7 +621,7 @@ for epoch in range(start_epoch, start_epoch + num_epochs):
606
  timesteps,
607
  encoder_hidden_states=embeddings,
608
  encoder_attention_mask=attention_mask,
609
- added_cond_kwargs={"text_embeds": pooled_embeddings} # <--- Передача пулинга
610
  ).sample
611
 
612
  target = noise - latents
 
32
  base_learning_rate = 3e-5
33
  min_learning_rate = 2.5e-5
34
  num_epochs = 10
35
+ sample_interval_share = 40
36
  max_length = 192
37
  use_wandb = True
38
  use_comet_ml = False
 
170
  batch_size = hidden.shape[0]
171
  pooled = hidden[torch.arange(batch_size, device=hidden.device), sequence_lengths]
172
 
173
+ #return hidden, attention_mask, pooled
174
+ # --- НОВАЯ ЛОГИКА: ОБЪЕДИНЕНИЕ ДЛЯ КРОСС-ВНИМАНИЯ ---
175
+ # 1. Расширяем пулинг-вектор до последовательности [B, 1, 1024]
176
+ pooled_expanded = pooled.unsqueeze(1)
177
+
178
+ # 2. Объединяем последовательность токенов и пулинг-вектор
179
+ # !!! ИЗМЕНЕНИЕ ЗДЕСЬ !!!: Пулинг идет ПЕРВЫМ
180
+ # Теперь: [B, 1 + L, 1024]. Пулинг стал токеном в НАЧАЛЕ.
181
+ new_encoder_hidden_states = torch.cat([pooled_expanded, hidden], dim=1)
182
+
183
+ # 3. Обновляем маску внимания для нового токена
184
+ # Маска внимания: [B, 1 + L]. Добавляем 1 в НАЧАЛО.
185
+ # torch.ones((batch_size, 1), device=device) создает маску [B, 1] со значениями 1.
186
+ new_attention_mask = torch.cat([torch.ones((batch_size, 1), device=device), attention_mask], dim=1)
187
+
188
+ return new_encoder_hidden_states, new_attention_mask, pooled
189
 
190
  shift_factor = getattr(vae.config, "shift_factor", 0.0)
191
  if shift_factor is None: shift_factor = 0.0
 
497
  t,
498
  encoder_hidden_states=text_embeddings_batch,
499
  encoder_attention_mask=attention_mask_batch,
500
+ #added_cond_kwargs={"text_embeds": pooled_batch} # <--- ПУЛИНГ ЗДЕСЬ
501
  )
502
  flow = getattr(model_out, "sample", model_out)
503
 
 
621
  timesteps,
622
  encoder_hidden_states=embeddings,
623
  encoder_attention_mask=attention_mask,
624
+ #added_cond_kwargs={"text_embeds": pooled_embeddings} # <--- Передача пулинга
625
  ).sample
626
 
627
  target = noise - latents
unet/config.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae4272f7e52480762c228a2fbe1db5f361d7a5971c3855b483999fb3df2d722b
3
- size 1885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab7222cdd538ff5178adc870a764d22ab24a185f0a7b63852ea728b3b09fcff
3
+ size 1876
unet/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccfa16a08b6e507835636c048b8c721186ef6832fec3ca889a7139dcd53676cf
3
- size 6625750656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:402b5747329ecd5573887b46e35e687b8f3d6c79ccba522c06e58082d0eace87
3
+ size 6604736640
{unet_old → unet_very_old}/config.json RENAMED
File without changes
{unet_old → unet_very_old}/diffusion_pytorch_model.safetensors RENAMED
File without changes