0706
Browse files- samples/unet_320x576_0.jpg +2 -2
- samples/unet_384x576_0.jpg +2 -2
- samples/unet_448x576_0.jpg +2 -2
- samples/unet_512x576_0.jpg +2 -2
- samples/unet_576x320_0.jpg +2 -2
- samples/unet_576x384_0.jpg +2 -2
- samples/unet_576x448_0.jpg +2 -2
- samples/unet_576x512_0.jpg +2 -2
- samples/unet_576x576_0.jpg +2 -2
- src/cherrypick.ipynb +2 -2
- train.py +5 -6
- unet/diffusion_pytorch_model.safetensors +1 -1
samples/unet_320x576_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_384x576_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_448x576_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_512x576_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_576x320_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_576x384_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_576x448_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_576x512_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_576x576_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
src/cherrypick.ipynb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a7a1a0affd76326768c9aa37d032b8c0156a10f06f3fe53b98ca695a9dac57f
|
| 3 |
+
size 44433
|
train.py
CHANGED
|
@@ -28,9 +28,9 @@ import torch.nn.functional as F
|
|
| 28 |
ds_path = "datasets/576"
|
| 29 |
project = "unet"
|
| 30 |
batch_size = 40
|
| 31 |
-
base_learning_rate =
|
| 32 |
-
min_learning_rate =
|
| 33 |
-
num_epochs =
|
| 34 |
# samples/save per epoch
|
| 35 |
sample_interval_share = 5
|
| 36 |
use_wandb = True
|
|
@@ -51,7 +51,7 @@ dtype = torch.float32
|
|
| 51 |
save_barrier = 1.03
|
| 52 |
dispersive_temperature=0.5
|
| 53 |
dispersive_weight=0.05
|
| 54 |
-
percentile_clipping =
|
| 55 |
steps_offset = 1 # Scheduler
|
| 56 |
limit = 0
|
| 57 |
checkpoints_folder = ""
|
|
@@ -880,7 +880,6 @@ for epoch in range(start_epoch, start_epoch + num_epochs):
|
|
| 880 |
dispersive_loss = dispersive_hook.weight * dispersive_hook.compute_dispersive_loss()
|
| 881 |
if torch.isnan(dispersive_loss) or torch.isinf(dispersive_loss):
|
| 882 |
print(f"Rank {accelerator.process_index}: Found nan/inf in dispersive_loss: {total_loss}")
|
| 883 |
-
#break
|
| 884 |
|
| 885 |
# Итоговый loss
|
| 886 |
# dispersive_loss должен падать и тотал падать - поэтому плюс
|
|
@@ -905,7 +904,7 @@ for epoch in range(start_epoch, start_epoch + num_epochs):
|
|
| 905 |
if not fbp:
|
| 906 |
if accelerator.sync_gradients:
|
| 907 |
with torch.amp.autocast('cuda', enabled=False):
|
| 908 |
-
grad = accelerator.clip_grad_norm_(unet.parameters(),
|
| 909 |
optimizer.step()
|
| 910 |
lr_scheduler.step()
|
| 911 |
optimizer.zero_grad(set_to_none=True)
|
|
|
|
| 28 |
ds_path = "datasets/576"
|
| 29 |
project = "unet"
|
| 30 |
batch_size = 40
|
| 31 |
+
base_learning_rate = 9e-6
|
| 32 |
+
min_learning_rate = 8e-6
|
| 33 |
+
num_epochs = 6
|
| 34 |
# samples/save per epoch
|
| 35 |
sample_interval_share = 5
|
| 36 |
use_wandb = True
|
|
|
|
| 51 |
save_barrier = 1.03
|
| 52 |
dispersive_temperature=0.5
|
| 53 |
dispersive_weight=0.05
|
| 54 |
+
percentile_clipping = 95 # 8bit optim
|
| 55 |
steps_offset = 1 # Scheduler
|
| 56 |
limit = 0
|
| 57 |
checkpoints_folder = ""
|
|
|
|
| 880 |
dispersive_loss = dispersive_hook.weight * dispersive_hook.compute_dispersive_loss()
|
| 881 |
if torch.isnan(dispersive_loss) or torch.isinf(dispersive_loss):
|
| 882 |
print(f"Rank {accelerator.process_index}: Found nan/inf in dispersive_loss: {total_loss}")
|
|
|
|
| 883 |
|
| 884 |
# Итоговый loss
|
| 885 |
# dispersive_loss должен падать и тотал падать - поэтому плюс
|
|
|
|
| 904 |
if not fbp:
|
| 905 |
if accelerator.sync_gradients:
|
| 906 |
with torch.amp.autocast('cuda', enabled=False):
|
| 907 |
+
grad = accelerator.clip_grad_norm_(unet.parameters(), 0.5)
|
| 908 |
optimizer.step()
|
| 909 |
lr_scheduler.step()
|
| 910 |
optimizer.zero_grad(set_to_none=True)
|
unet/diffusion_pytorch_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7014306128
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3991dd208c9c94d95c5c2465e0dc11197da00f0b30aebb53d89b3e9bc9392ae4
|
| 3 |
size 7014306128
|