Upload folder using huggingface_hub
Browse files
stage2/lightningdit-xl-pe-vit-b-bf16/checkpoints/0025000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d540005ebc084282f849ffd7312f49840b358f21ca3813943180559d22701f71
|
| 3 |
+
size 19230431602
|
stage2/lightningdit-xl-pe-vit-b-bf16/checkpoints/0050000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:335a051bc7bcf0b16bebf3b1fb9d8f5f8daba6c800bb4b970a416315021c2306
|
| 3 |
+
size 19230431602
|
stage2/lightningdit-xl-pe-vit-b-bf16/checkpoints/0075000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a641f46d4a6757186572e2433cf4548a8e9b6faf8820745adbb88597c1ead8e
|
| 3 |
+
size 19230431602
|
stage2/lightningdit-xl-pe-vit-b-bf16/log.txt
ADDED
|
@@ -0,0 +1,1116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[[34m2025-10-27 23:30:09[0m] Experiment directory created at results/stage2/hfdata/lightningdit-xl-pe-vit-b-bf16
|
| 2 |
+
[[34m2025-10-27 23:30:15[0m] Missing keys for loading vision encoder: []
|
| 3 |
+
[[34m2025-10-27 23:30:15[0m] Unexpected keys for loading vision encoder: []
|
| 4 |
+
[[34m2025-10-27 23:30:30[0m] Model Parameters: 1202.04M
|
| 5 |
+
[[34m2025-10-27 23:30:33[0m] Dataset contains 1,281,167 images (/scratch/xingjian.leng/data/train)
|
| 6 |
+
[[34m2025-10-27 23:30:33[0m] Gradient accumulation: steps=1, micro batch=128, per-GPU batch=128, global batch=1024
|
| 7 |
+
[[34m2025-10-27 23:30:33[0m] Precision mode: bf16
|
| 8 |
+
[[34m2025-10-27 23:30:33[0m] Training configured for 80 epochs, 1251 steps per epoch.
|
| 9 |
+
[[34m2025-10-27 23:30:33[0m] Optimizer: ADAMW with lr=0.0002, betas=(0.9, 0.95), weight_decay=0.0, eps=1e-08
|
| 10 |
+
Scheduler: linear with warmup_steps=0, decay_end_steps=0, final_lr=0.0002
|
| 11 |
+
[[34m2025-10-27 23:30:33[0m] Training for 80 epochs...
|
| 12 |
+
[[34m2025-10-27 23:30:33[0m] Beginning epoch 0...
|
| 13 |
+
[[34m2025-10-27 23:30:39[0m] Generating EMA samples...
|
| 14 |
+
[[34m2025-10-27 23:31:08[0m] Generating EMA samples done.
|
| 15 |
+
[[34m2025-10-27 23:32:30[0m] (step=0000100) Train Loss: 1.6477, Train Steps/Sec: 0.86
|
| 16 |
+
[[34m2025-10-27 23:33:54[0m] (step=0000200) Train Loss: 1.1862, Train Steps/Sec: 1.20
|
| 17 |
+
[[34m2025-10-27 23:35:17[0m] (step=0000300) Train Loss: 1.0226, Train Steps/Sec: 1.20
|
| 18 |
+
[[34m2025-10-27 23:36:41[0m] (step=0000400) Train Loss: 0.9462, Train Steps/Sec: 1.20
|
| 19 |
+
[[34m2025-10-27 23:38:04[0m] (step=0000500) Train Loss: 0.9013, Train Steps/Sec: 1.20
|
| 20 |
+
[[34m2025-10-27 23:39:28[0m] (step=0000600) Train Loss: 0.8715, Train Steps/Sec: 1.20
|
| 21 |
+
[[34m2025-10-27 23:40:51[0m] (step=0000700) Train Loss: 0.8490, Train Steps/Sec: 1.20
|
| 22 |
+
[[34m2025-10-27 23:42:15[0m] (step=0000800) Train Loss: 0.8317, Train Steps/Sec: 1.19
|
| 23 |
+
[[34m2025-10-27 23:43:39[0m] (step=0000900) Train Loss: 0.8191, Train Steps/Sec: 1.20
|
| 24 |
+
[[34m2025-10-27 23:45:02[0m] (step=0001000) Train Loss: 0.8082, Train Steps/Sec: 1.20
|
| 25 |
+
[[34m2025-10-27 23:46:26[0m] (step=0001100) Train Loss: 0.7983, Train Steps/Sec: 1.20
|
| 26 |
+
[[34m2025-10-27 23:47:49[0m] (step=0001200) Train Loss: 0.7907, Train Steps/Sec: 1.20
|
| 27 |
+
[[34m2025-10-27 23:48:33[0m] Beginning epoch 1...
|
| 28 |
+
[[34m2025-10-27 23:49:16[0m] (step=0001300) Train Loss: 0.7838, Train Steps/Sec: 1.16
|
| 29 |
+
[[34m2025-10-27 23:50:39[0m] (step=0001400) Train Loss: 0.7766, Train Steps/Sec: 1.20
|
| 30 |
+
[[34m2025-10-27 23:52:03[0m] (step=0001500) Train Loss: 0.7712, Train Steps/Sec: 1.20
|
| 31 |
+
[[34m2025-10-27 23:53:26[0m] (step=0001600) Train Loss: 0.7650, Train Steps/Sec: 1.20
|
| 32 |
+
[[34m2025-10-27 23:54:50[0m] (step=0001700) Train Loss: 0.7623, Train Steps/Sec: 1.20
|
| 33 |
+
[[34m2025-10-27 23:56:14[0m] (step=0001800) Train Loss: 0.7565, Train Steps/Sec: 1.20
|
| 34 |
+
[[34m2025-10-28 00:02:06[0m] Experiment directory created at results/stage2/hfdata/lightningdit-xl-pe-vit-b-bf16
|
| 35 |
+
[[34m2025-10-28 00:02:10[0m] Missing keys for loading vision encoder: []
|
| 36 |
+
[[34m2025-10-28 00:02:10[0m] Unexpected keys for loading vision encoder: []
|
| 37 |
+
[[34m2025-10-28 00:02:25[0m] Model Parameters: 1202.04M
|
| 38 |
+
[[34m2025-10-28 00:02:30[0m] Dataset contains 1,281,167 images (/scratch/xingjian.leng/data/train)
|
| 39 |
+
[[34m2025-10-28 00:02:30[0m] Gradient accumulation: steps=1, micro batch=128, per-GPU batch=128, global batch=1024
|
| 40 |
+
[[34m2025-10-28 00:02:30[0m] Precision mode: bf16
|
| 41 |
+
[[34m2025-10-28 00:02:30[0m] Training configured for 80 epochs, 1251 steps per epoch.
|
| 42 |
+
[[34m2025-10-28 00:02:30[0m] Optimizer: ADAMW with lr=0.0002, betas=(0.9, 0.95), weight_decay=0.0, eps=1e-08
|
| 43 |
+
Scheduler: linear with warmup_steps=0, decay_end_steps=0, final_lr=0.0002
|
| 44 |
+
[[34m2025-10-28 00:02:30[0m] Training for 80 epochs...
|
| 45 |
+
[[34m2025-10-28 00:02:30[0m] Beginning epoch 0...
|
| 46 |
+
[[34m2025-10-28 00:02:36[0m] Generating EMA samples...
|
| 47 |
+
[[34m2025-10-28 00:03:04[0m] Generating EMA samples done.
|
| 48 |
+
[[34m2025-10-28 00:04:25[0m] (step=0000100) Train Loss: 1.6477, Train Steps/Sec: 0.87
|
| 49 |
+
[[34m2025-10-28 00:05:47[0m] (step=0000200) Train Loss: 1.1862, Train Steps/Sec: 1.21
|
| 50 |
+
[[34m2025-10-28 00:07:10[0m] (step=0000300) Train Loss: 1.0226, Train Steps/Sec: 1.21
|
| 51 |
+
[[34m2025-10-28 00:08:32[0m] (step=0000400) Train Loss: 0.9462, Train Steps/Sec: 1.21
|
| 52 |
+
[[34m2025-10-28 00:09:55[0m] (step=0000500) Train Loss: 0.9013, Train Steps/Sec: 1.21
|
| 53 |
+
[[34m2025-10-28 00:11:17[0m] (step=0000600) Train Loss: 0.8715, Train Steps/Sec: 1.21
|
| 54 |
+
[[34m2025-10-28 00:12:40[0m] (step=0000700) Train Loss: 0.8490, Train Steps/Sec: 1.21
|
| 55 |
+
[[34m2025-10-28 00:14:02[0m] (step=0000800) Train Loss: 0.8317, Train Steps/Sec: 1.21
|
| 56 |
+
[[34m2025-10-28 00:15:25[0m] (step=0000900) Train Loss: 0.8191, Train Steps/Sec: 1.21
|
| 57 |
+
[[34m2025-10-28 00:16:47[0m] (step=0001000) Train Loss: 0.8082, Train Steps/Sec: 1.21
|
| 58 |
+
[[34m2025-10-28 00:18:10[0m] (step=0001100) Train Loss: 0.7983, Train Steps/Sec: 1.21
|
| 59 |
+
[[34m2025-10-28 00:19:32[0m] (step=0001200) Train Loss: 0.7907, Train Steps/Sec: 1.21
|
| 60 |
+
[[34m2025-10-28 00:20:15[0m] Beginning epoch 1...
|
| 61 |
+
[[34m2025-10-28 00:20:57[0m] (step=0001300) Train Loss: 0.7838, Train Steps/Sec: 1.18
|
| 62 |
+
[[34m2025-10-28 00:22:19[0m] (step=0001400) Train Loss: 0.7766, Train Steps/Sec: 1.22
|
| 63 |
+
[[34m2025-10-28 00:23:41[0m] (step=0001500) Train Loss: 0.7712, Train Steps/Sec: 1.21
|
| 64 |
+
[[34m2025-10-28 00:25:04[0m] (step=0001600) Train Loss: 0.7650, Train Steps/Sec: 1.21
|
| 65 |
+
[[34m2025-10-28 00:26:26[0m] (step=0001700) Train Loss: 0.7623, Train Steps/Sec: 1.21
|
| 66 |
+
[[34m2025-10-28 00:27:49[0m] (step=0001800) Train Loss: 0.7565, Train Steps/Sec: 1.21
|
| 67 |
+
[[34m2025-10-28 00:29:11[0m] (step=0001900) Train Loss: 0.7531, Train Steps/Sec: 1.21
|
| 68 |
+
[[34m2025-10-28 00:30:33[0m] (step=0002000) Train Loss: 0.7491, Train Steps/Sec: 1.21
|
| 69 |
+
[[34m2025-10-28 00:31:56[0m] (step=0002100) Train Loss: 0.7464, Train Steps/Sec: 1.21
|
| 70 |
+
[[34m2025-10-28 00:33:18[0m] (step=0002200) Train Loss: 0.7427, Train Steps/Sec: 1.21
|
| 71 |
+
[[34m2025-10-28 00:34:41[0m] (step=0002300) Train Loss: 0.7393, Train Steps/Sec: 1.21
|
| 72 |
+
[[34m2025-10-28 00:36:04[0m] (step=0002400) Train Loss: 0.7360, Train Steps/Sec: 1.21
|
| 73 |
+
[[34m2025-10-28 00:37:26[0m] (step=0002500) Train Loss: 0.7317, Train Steps/Sec: 1.21
|
| 74 |
+
[[34m2025-10-28 00:37:28[0m] Beginning epoch 2...
|
| 75 |
+
[[34m2025-10-28 00:38:51[0m] (step=0002600) Train Loss: 0.7300, Train Steps/Sec: 1.18
|
| 76 |
+
[[34m2025-10-28 00:40:14[0m] (step=0002700) Train Loss: 0.7280, Train Steps/Sec: 1.21
|
| 77 |
+
[[34m2025-10-28 00:41:36[0m] (step=0002800) Train Loss: 0.7255, Train Steps/Sec: 1.21
|
| 78 |
+
[[34m2025-10-28 00:42:58[0m] (step=0002900) Train Loss: 0.7245, Train Steps/Sec: 1.21
|
| 79 |
+
[[34m2025-10-28 00:44:21[0m] (step=0003000) Train Loss: 0.7225, Train Steps/Sec: 1.21
|
| 80 |
+
[[34m2025-10-28 00:45:43[0m] (step=0003100) Train Loss: 0.7195, Train Steps/Sec: 1.21
|
| 81 |
+
[[34m2025-10-28 00:47:06[0m] (step=0003200) Train Loss: 0.7184, Train Steps/Sec: 1.21
|
| 82 |
+
[[34m2025-10-28 00:48:28[0m] (step=0003300) Train Loss: 0.7160, Train Steps/Sec: 1.21
|
| 83 |
+
[[34m2025-10-28 00:49:51[0m] (step=0003400) Train Loss: 0.7134, Train Steps/Sec: 1.21
|
| 84 |
+
[[34m2025-10-28 00:51:13[0m] (step=0003500) Train Loss: 0.7117, Train Steps/Sec: 1.21
|
| 85 |
+
[[34m2025-10-28 00:52:36[0m] (step=0003600) Train Loss: 0.7094, Train Steps/Sec: 1.21
|
| 86 |
+
[[34m2025-10-28 00:53:58[0m] (step=0003700) Train Loss: 0.7091, Train Steps/Sec: 1.21
|
| 87 |
+
[[34m2025-10-28 00:54:42[0m] Beginning epoch 3...
|
| 88 |
+
[[34m2025-10-28 00:55:23[0m] (step=0003800) Train Loss: 0.7077, Train Steps/Sec: 1.18
|
| 89 |
+
[[34m2025-10-28 00:56:45[0m] (step=0003900) Train Loss: 0.7064, Train Steps/Sec: 1.21
|
| 90 |
+
[[34m2025-10-28 00:58:08[0m] (step=0004000) Train Loss: 0.7052, Train Steps/Sec: 1.21
|
| 91 |
+
[[34m2025-10-28 00:59:31[0m] (step=0004100) Train Loss: 0.7036, Train Steps/Sec: 1.21
|
| 92 |
+
[[34m2025-10-28 01:00:53[0m] (step=0004200) Train Loss: 0.7007, Train Steps/Sec: 1.21
|
| 93 |
+
[[34m2025-10-28 01:02:15[0m] (step=0004300) Train Loss: 0.7017, Train Steps/Sec: 1.21
|
| 94 |
+
[[34m2025-10-28 01:03:38[0m] (step=0004400) Train Loss: 0.6988, Train Steps/Sec: 1.21
|
| 95 |
+
[[34m2025-10-28 01:05:00[0m] (step=0004500) Train Loss: 0.6971, Train Steps/Sec: 1.21
|
| 96 |
+
[[34m2025-10-28 01:06:23[0m] (step=0004600) Train Loss: 0.6953, Train Steps/Sec: 1.21
|
| 97 |
+
[[34m2025-10-28 01:07:45[0m] (step=0004700) Train Loss: 0.6965, Train Steps/Sec: 1.21
|
| 98 |
+
[[34m2025-10-28 01:09:07[0m] (step=0004800) Train Loss: 0.6939, Train Steps/Sec: 1.21
|
| 99 |
+
[[34m2025-10-28 01:10:30[0m] (step=0004900) Train Loss: 0.6927, Train Steps/Sec: 1.21
|
| 100 |
+
[[34m2025-10-28 01:11:52[0m] (step=0005000) Train Loss: 0.6926, Train Steps/Sec: 1.21
|
| 101 |
+
[[34m2025-10-28 01:11:56[0m] Beginning epoch 4...
|
| 102 |
+
[[34m2025-10-28 01:13:17[0m] (step=0005100) Train Loss: 0.6900, Train Steps/Sec: 1.18
|
| 103 |
+
[[34m2025-10-28 01:14:40[0m] (step=0005200) Train Loss: 0.6909, Train Steps/Sec: 1.21
|
| 104 |
+
[[34m2025-10-28 01:16:02[0m] (step=0005300) Train Loss: 0.6888, Train Steps/Sec: 1.21
|
| 105 |
+
[[34m2025-10-28 01:17:24[0m] (step=0005400) Train Loss: 0.6877, Train Steps/Sec: 1.21
|
| 106 |
+
[[34m2025-10-28 01:18:47[0m] (step=0005500) Train Loss: 0.6860, Train Steps/Sec: 1.21
|
| 107 |
+
[[34m2025-10-28 01:20:09[0m] (step=0005600) Train Loss: 0.6870, Train Steps/Sec: 1.21
|
| 108 |
+
[[34m2025-10-28 01:21:31[0m] (step=0005700) Train Loss: 0.6853, Train Steps/Sec: 1.21
|
| 109 |
+
[[34m2025-10-28 01:22:55[0m] (step=0005800) Train Loss: 0.6841, Train Steps/Sec: 1.20
|
| 110 |
+
[[34m2025-10-28 01:24:17[0m] (step=0005900) Train Loss: 0.6824, Train Steps/Sec: 1.21
|
| 111 |
+
[[34m2025-10-28 01:25:40[0m] (step=0006000) Train Loss: 0.6811, Train Steps/Sec: 1.21
|
| 112 |
+
[[34m2025-10-28 01:27:02[0m] (step=0006100) Train Loss: 0.6819, Train Steps/Sec: 1.21
|
| 113 |
+
[[34m2025-10-28 01:28:24[0m] (step=0006200) Train Loss: 0.6799, Train Steps/Sec: 1.21
|
| 114 |
+
[[34m2025-10-28 01:29:10[0m] Beginning epoch 5...
|
| 115 |
+
[[34m2025-10-28 01:29:49[0m] (step=0006300) Train Loss: 0.6801, Train Steps/Sec: 1.18
|
| 116 |
+
[[34m2025-10-28 01:31:12[0m] (step=0006400) Train Loss: 0.6806, Train Steps/Sec: 1.21
|
| 117 |
+
[[34m2025-10-28 01:32:34[0m] (step=0006500) Train Loss: 0.6772, Train Steps/Sec: 1.21
|
| 118 |
+
[[34m2025-10-28 01:33:56[0m] (step=0006600) Train Loss: 0.6774, Train Steps/Sec: 1.21
|
| 119 |
+
[[34m2025-10-28 01:35:19[0m] (step=0006700) Train Loss: 0.6777, Train Steps/Sec: 1.21
|
| 120 |
+
[[34m2025-10-28 01:36:41[0m] (step=0006800) Train Loss: 0.6771, Train Steps/Sec: 1.21
|
| 121 |
+
[[34m2025-10-28 01:38:03[0m] (step=0006900) Train Loss: 0.6762, Train Steps/Sec: 1.21
|
| 122 |
+
[[34m2025-10-28 01:39:26[0m] (step=0007000) Train Loss: 0.6764, Train Steps/Sec: 1.21
|
| 123 |
+
[[34m2025-10-28 01:40:48[0m] (step=0007100) Train Loss: 0.6753, Train Steps/Sec: 1.21
|
| 124 |
+
[[34m2025-10-28 01:42:11[0m] (step=0007200) Train Loss: 0.6746, Train Steps/Sec: 1.21
|
| 125 |
+
[[34m2025-10-28 01:43:33[0m] (step=0007300) Train Loss: 0.6733, Train Steps/Sec: 1.21
|
| 126 |
+
[[34m2025-10-28 01:44:56[0m] (step=0007400) Train Loss: 0.6743, Train Steps/Sec: 1.20
|
| 127 |
+
[[34m2025-10-28 01:46:19[0m] (step=0007500) Train Loss: 0.6714, Train Steps/Sec: 1.21
|
| 128 |
+
[[34m2025-10-28 01:46:24[0m] Beginning epoch 6...
|
| 129 |
+
[[34m2025-10-28 01:47:43[0m] (step=0007600) Train Loss: 0.6729, Train Steps/Sec: 1.18
|
| 130 |
+
[[34m2025-10-28 01:49:06[0m] (step=0007700) Train Loss: 0.6715, Train Steps/Sec: 1.21
|
| 131 |
+
[[34m2025-10-28 01:50:28[0m] (step=0007800) Train Loss: 0.6712, Train Steps/Sec: 1.21
|
| 132 |
+
[[34m2025-10-28 01:51:51[0m] (step=0007900) Train Loss: 0.6715, Train Steps/Sec: 1.21
|
| 133 |
+
[[34m2025-10-28 01:53:13[0m] (step=0008000) Train Loss: 0.6691, Train Steps/Sec: 1.21
|
| 134 |
+
[[34m2025-10-28 01:54:36[0m] (step=0008100) Train Loss: 0.6698, Train Steps/Sec: 1.21
|
| 135 |
+
[[34m2025-10-28 01:55:58[0m] (step=0008200) Train Loss: 0.6685, Train Steps/Sec: 1.21
|
| 136 |
+
[[34m2025-10-28 01:57:20[0m] (step=0008300) Train Loss: 0.6680, Train Steps/Sec: 1.21
|
| 137 |
+
[[34m2025-10-28 01:58:43[0m] (step=0008400) Train Loss: 0.6667, Train Steps/Sec: 1.21
|
| 138 |
+
[[34m2025-10-28 02:00:05[0m] (step=0008500) Train Loss: 0.6666, Train Steps/Sec: 1.21
|
| 139 |
+
[[34m2025-10-28 02:01:28[0m] (step=0008600) Train Loss: 0.6672, Train Steps/Sec: 1.21
|
| 140 |
+
[[34m2025-10-28 02:02:50[0m] (step=0008700) Train Loss: 0.6673, Train Steps/Sec: 1.21
|
| 141 |
+
[[34m2025-10-28 02:03:38[0m] Beginning epoch 7...
|
| 142 |
+
[[34m2025-10-28 02:04:15[0m] (step=0008800) Train Loss: 0.6661, Train Steps/Sec: 1.18
|
| 143 |
+
[[34m2025-10-28 02:05:37[0m] (step=0008900) Train Loss: 0.6647, Train Steps/Sec: 1.21
|
| 144 |
+
[[34m2025-10-28 02:07:00[0m] (step=0009000) Train Loss: 0.6649, Train Steps/Sec: 1.21
|
| 145 |
+
[[34m2025-10-28 02:08:23[0m] (step=0009100) Train Loss: 0.6648, Train Steps/Sec: 1.20
|
| 146 |
+
[[34m2025-10-28 02:09:45[0m] (step=0009200) Train Loss: 0.6644, Train Steps/Sec: 1.21
|
| 147 |
+
[[34m2025-10-28 02:11:08[0m] (step=0009300) Train Loss: 0.6636, Train Steps/Sec: 1.21
|
| 148 |
+
[[34m2025-10-28 02:12:30[0m] (step=0009400) Train Loss: 0.6628, Train Steps/Sec: 1.21
|
| 149 |
+
[[34m2025-10-28 02:13:52[0m] (step=0009500) Train Loss: 0.6628, Train Steps/Sec: 1.21
|
| 150 |
+
[[34m2025-10-28 02:15:15[0m] (step=0009600) Train Loss: 0.6627, Train Steps/Sec: 1.21
|
| 151 |
+
[[34m2025-10-28 02:16:37[0m] (step=0009700) Train Loss: 0.6624, Train Steps/Sec: 1.21
|
| 152 |
+
[[34m2025-10-28 02:18:00[0m] (step=0009800) Train Loss: 0.6624, Train Steps/Sec: 1.21
|
| 153 |
+
[[34m2025-10-28 02:19:22[0m] (step=0009900) Train Loss: 0.6598, Train Steps/Sec: 1.21
|
| 154 |
+
[[34m2025-10-28 02:20:45[0m] (step=0010000) Train Loss: 0.6615, Train Steps/Sec: 1.21
|
| 155 |
+
[[34m2025-10-28 02:20:52[0m] Beginning epoch 8...
|
| 156 |
+
[[34m2025-10-28 02:22:09[0m] (step=0010100) Train Loss: 0.6600, Train Steps/Sec: 1.18
|
| 157 |
+
[[34m2025-10-28 02:23:32[0m] (step=0010200) Train Loss: 0.6603, Train Steps/Sec: 1.21
|
| 158 |
+
[[34m2025-10-28 02:24:54[0m] (step=0010300) Train Loss: 0.6591, Train Steps/Sec: 1.21
|
| 159 |
+
[[34m2025-10-28 02:26:17[0m] (step=0010400) Train Loss: 0.6594, Train Steps/Sec: 1.21
|
| 160 |
+
[[34m2025-10-28 02:27:39[0m] (step=0010500) Train Loss: 0.6585, Train Steps/Sec: 1.21
|
| 161 |
+
[[34m2025-10-28 02:29:02[0m] (step=0010600) Train Loss: 0.6587, Train Steps/Sec: 1.21
|
| 162 |
+
[[34m2025-10-28 02:30:25[0m] (step=0010700) Train Loss: 0.6591, Train Steps/Sec: 1.20
|
| 163 |
+
[[34m2025-10-28 02:31:47[0m] (step=0010800) Train Loss: 0.6591, Train Steps/Sec: 1.21
|
| 164 |
+
[[34m2025-10-28 02:33:10[0m] (step=0010900) Train Loss: 0.6580, Train Steps/Sec: 1.21
|
| 165 |
+
[[34m2025-10-28 02:34:32[0m] (step=0011000) Train Loss: 0.6575, Train Steps/Sec: 1.21
|
| 166 |
+
[[34m2025-10-28 02:35:54[0m] (step=0011100) Train Loss: 0.6566, Train Steps/Sec: 1.22
|
| 167 |
+
[[34m2025-10-28 02:37:17[0m] (step=0011200) Train Loss: 0.6579, Train Steps/Sec: 1.21
|
| 168 |
+
[[34m2025-10-28 02:38:06[0m] Beginning epoch 9...
|
| 169 |
+
[[34m2025-10-28 02:38:42[0m] (step=0011300) Train Loss: 0.6569, Train Steps/Sec: 1.18
|
| 170 |
+
[[34m2025-10-28 02:40:04[0m] (step=0011400) Train Loss: 0.6560, Train Steps/Sec: 1.21
|
| 171 |
+
[[34m2025-10-28 02:41:26[0m] (step=0011500) Train Loss: 0.6552, Train Steps/Sec: 1.21
|
| 172 |
+
[[34m2025-10-28 02:42:49[0m] (step=0011600) Train Loss: 0.6547, Train Steps/Sec: 1.21
|
| 173 |
+
[[34m2025-10-28 02:44:11[0m] (step=0011700) Train Loss: 0.6552, Train Steps/Sec: 1.21
|
| 174 |
+
[[34m2025-10-28 02:45:34[0m] (step=0011800) Train Loss: 0.6534, Train Steps/Sec: 1.21
|
| 175 |
+
[[34m2025-10-28 02:46:56[0m] (step=0011900) Train Loss: 0.6541, Train Steps/Sec: 1.21
|
| 176 |
+
[[34m2025-10-28 02:48:18[0m] (step=0012000) Train Loss: 0.6547, Train Steps/Sec: 1.22
|
| 177 |
+
[[34m2025-10-28 02:49:41[0m] (step=0012100) Train Loss: 0.6544, Train Steps/Sec: 1.22
|
| 178 |
+
[[34m2025-10-28 02:51:03[0m] (step=0012200) Train Loss: 0.6546, Train Steps/Sec: 1.21
|
| 179 |
+
[[34m2025-10-28 02:52:26[0m] (step=0012300) Train Loss: 0.6538, Train Steps/Sec: 1.21
|
| 180 |
+
[[34m2025-10-28 02:53:49[0m] (step=0012400) Train Loss: 0.6529, Train Steps/Sec: 1.20
|
| 181 |
+
[[34m2025-10-28 02:55:11[0m] (step=0012500) Train Loss: 0.6525, Train Steps/Sec: 1.21
|
| 182 |
+
[[34m2025-10-28 02:55:20[0m] Beginning epoch 10...
|
| 183 |
+
[[34m2025-10-28 02:56:36[0m] (step=0012600) Train Loss: 0.6523, Train Steps/Sec: 1.18
|
| 184 |
+
[[34m2025-10-28 02:57:59[0m] (step=0012700) Train Loss: 0.6537, Train Steps/Sec: 1.21
|
| 185 |
+
[[34m2025-10-28 02:59:21[0m] (step=0012800) Train Loss: 0.6525, Train Steps/Sec: 1.21
|
| 186 |
+
[[34m2025-10-28 03:00:43[0m] (step=0012900) Train Loss: 0.6511, Train Steps/Sec: 1.21
|
| 187 |
+
[[34m2025-10-28 03:02:06[0m] (step=0013000) Train Loss: 0.6519, Train Steps/Sec: 1.21
|
| 188 |
+
[[34m2025-10-28 03:03:28[0m] (step=0013100) Train Loss: 0.6505, Train Steps/Sec: 1.21
|
| 189 |
+
[[34m2025-10-28 03:04:51[0m] (step=0013200) Train Loss: 0.6521, Train Steps/Sec: 1.21
|
| 190 |
+
[[34m2025-10-28 03:06:13[0m] (step=0013300) Train Loss: 0.6519, Train Steps/Sec: 1.21
|
| 191 |
+
[[34m2025-10-28 03:07:36[0m] (step=0013400) Train Loss: 0.6498, Train Steps/Sec: 1.21
|
| 192 |
+
[[34m2025-10-28 03:08:58[0m] (step=0013500) Train Loss: 0.6515, Train Steps/Sec: 1.21
|
| 193 |
+
[[34m2025-10-28 03:10:20[0m] (step=0013600) Train Loss: 0.6507, Train Steps/Sec: 1.21
|
| 194 |
+
[[34m2025-10-28 03:11:43[0m] (step=0013700) Train Loss: 0.6495, Train Steps/Sec: 1.21
|
| 195 |
+
[[34m2025-10-28 03:12:34[0m] Beginning epoch 11...
|
| 196 |
+
[[34m2025-10-28 03:13:08[0m] (step=0013800) Train Loss: 0.6501, Train Steps/Sec: 1.18
|
| 197 |
+
[[34m2025-10-28 03:14:30[0m] (step=0013900) Train Loss: 0.6481, Train Steps/Sec: 1.21
|
| 198 |
+
[[34m2025-10-28 03:15:52[0m] (step=0014000) Train Loss: 0.6499, Train Steps/Sec: 1.22
|
| 199 |
+
[[34m2025-10-28 03:17:16[0m] (step=0014100) Train Loss: 0.6481, Train Steps/Sec: 1.20
|
| 200 |
+
[[34m2025-10-28 03:18:38[0m] (step=0014200) Train Loss: 0.6505, Train Steps/Sec: 1.21
|
| 201 |
+
[[34m2025-10-28 03:20:00[0m] (step=0014300) Train Loss: 0.6497, Train Steps/Sec: 1.21
|
| 202 |
+
[[34m2025-10-28 03:21:23[0m] (step=0014400) Train Loss: 0.6493, Train Steps/Sec: 1.21
|
| 203 |
+
[[34m2025-10-28 03:22:45[0m] (step=0014500) Train Loss: 0.6484, Train Steps/Sec: 1.21
|
| 204 |
+
[[34m2025-10-28 03:24:08[0m] (step=0014600) Train Loss: 0.6484, Train Steps/Sec: 1.21
|
| 205 |
+
[[34m2025-10-28 03:25:30[0m] (step=0014700) Train Loss: 0.6493, Train Steps/Sec: 1.21
|
| 206 |
+
[[34m2025-10-28 03:26:53[0m] (step=0014800) Train Loss: 0.6481, Train Steps/Sec: 1.21
|
| 207 |
+
[[34m2025-10-28 03:28:15[0m] (step=0014900) Train Loss: 0.6477, Train Steps/Sec: 1.21
|
| 208 |
+
[[34m2025-10-28 03:29:37[0m] (step=0015000) Train Loss: 0.6479, Train Steps/Sec: 1.21
|
| 209 |
+
[[34m2025-10-28 03:29:48[0m] Beginning epoch 12...
|
| 210 |
+
[[34m2025-10-28 03:31:02[0m] (step=0015100) Train Loss: 0.6482, Train Steps/Sec: 1.18
|
| 211 |
+
[[34m2025-10-28 03:32:25[0m] (step=0015200) Train Loss: 0.6472, Train Steps/Sec: 1.21
|
| 212 |
+
[[34m2025-10-28 03:33:47[0m] (step=0015300) Train Loss: 0.6471, Train Steps/Sec: 1.21
|
| 213 |
+
[[34m2025-10-28 03:35:10[0m] (step=0015400) Train Loss: 0.6464, Train Steps/Sec: 1.21
|
| 214 |
+
[[34m2025-10-28 03:36:32[0m] (step=0015500) Train Loss: 0.6457, Train Steps/Sec: 1.21
|
| 215 |
+
[[34m2025-10-28 03:37:55[0m] (step=0015600) Train Loss: 0.6466, Train Steps/Sec: 1.21
|
| 216 |
+
[[34m2025-10-28 03:39:18[0m] (step=0015700) Train Loss: 0.6462, Train Steps/Sec: 1.21
|
| 217 |
+
[[34m2025-10-28 03:40:40[0m] (step=0015800) Train Loss: 0.6451, Train Steps/Sec: 1.21
|
| 218 |
+
[[34m2025-10-28 03:42:03[0m] (step=0015900) Train Loss: 0.6442, Train Steps/Sec: 1.21
|
| 219 |
+
[[34m2025-10-28 03:43:25[0m] (step=0016000) Train Loss: 0.6450, Train Steps/Sec: 1.21
|
| 220 |
+
[[34m2025-10-28 03:44:48[0m] (step=0016100) Train Loss: 0.6447, Train Steps/Sec: 1.21
|
| 221 |
+
[[34m2025-10-28 03:46:10[0m] (step=0016200) Train Loss: 0.6436, Train Steps/Sec: 1.22
|
| 222 |
+
[[34m2025-10-28 03:47:02[0m] Beginning epoch 13...
|
| 223 |
+
[[34m2025-10-28 03:47:34[0m] (step=0016300) Train Loss: 0.6449, Train Steps/Sec: 1.18
|
| 224 |
+
[[34m2025-10-28 03:48:57[0m] (step=0016400) Train Loss: 0.6450, Train Steps/Sec: 1.21
|
| 225 |
+
[[34m2025-10-28 03:50:19[0m] (step=0016500) Train Loss: 0.6434, Train Steps/Sec: 1.21
|
| 226 |
+
[[34m2025-10-28 03:51:42[0m] (step=0016600) Train Loss: 0.6440, Train Steps/Sec: 1.21
|
| 227 |
+
[[34m2025-10-28 03:53:04[0m] (step=0016700) Train Loss: 0.6442, Train Steps/Sec: 1.21
|
| 228 |
+
[[34m2025-10-28 03:54:27[0m] (step=0016800) Train Loss: 0.6429, Train Steps/Sec: 1.21
|
| 229 |
+
[[34m2025-10-28 03:55:49[0m] (step=0016900) Train Loss: 0.6435, Train Steps/Sec: 1.22
|
| 230 |
+
[[34m2025-10-28 03:57:11[0m] (step=0017000) Train Loss: 0.6432, Train Steps/Sec: 1.21
|
| 231 |
+
[[34m2025-10-28 03:58:34[0m] (step=0017100) Train Loss: 0.6433, Train Steps/Sec: 1.21
|
| 232 |
+
[[34m2025-10-28 03:59:56[0m] (step=0017200) Train Loss: 0.6413, Train Steps/Sec: 1.22
|
| 233 |
+
[[34m2025-10-28 04:01:18[0m] (step=0017300) Train Loss: 0.6424, Train Steps/Sec: 1.21
|
| 234 |
+
[[34m2025-10-28 04:02:42[0m] (step=0017400) Train Loss: 0.6428, Train Steps/Sec: 1.20
|
| 235 |
+
[[34m2025-10-28 04:04:04[0m] (step=0017500) Train Loss: 0.6432, Train Steps/Sec: 1.21
|
| 236 |
+
[[34m2025-10-28 04:04:16[0m] Beginning epoch 14...
|
| 237 |
+
[[34m2025-10-28 04:05:29[0m] (step=0017600) Train Loss: 0.6411, Train Steps/Sec: 1.18
|
| 238 |
+
[[34m2025-10-28 04:06:51[0m] (step=0017700) Train Loss: 0.6429, Train Steps/Sec: 1.21
|
| 239 |
+
[[34m2025-10-28 04:08:14[0m] (step=0017800) Train Loss: 0.6421, Train Steps/Sec: 1.21
|
| 240 |
+
[[34m2025-10-28 04:09:36[0m] (step=0017900) Train Loss: 0.6427, Train Steps/Sec: 1.21
|
| 241 |
+
[[34m2025-10-28 04:10:59[0m] (step=0018000) Train Loss: 0.6417, Train Steps/Sec: 1.21
|
| 242 |
+
[[34m2025-10-28 04:12:21[0m] (step=0018100) Train Loss: 0.6415, Train Steps/Sec: 1.21
|
| 243 |
+
[[34m2025-10-28 04:13:43[0m] (step=0018200) Train Loss: 0.6418, Train Steps/Sec: 1.21
|
| 244 |
+
[[34m2025-10-28 04:15:06[0m] (step=0018300) Train Loss: 0.6434, Train Steps/Sec: 1.21
|
| 245 |
+
[[34m2025-10-28 04:16:28[0m] (step=0018400) Train Loss: 0.6419, Train Steps/Sec: 1.21
|
| 246 |
+
[[34m2025-10-28 04:17:51[0m] (step=0018500) Train Loss: 0.6404, Train Steps/Sec: 1.21
|
| 247 |
+
[[34m2025-10-28 04:19:13[0m] (step=0018600) Train Loss: 0.6427, Train Steps/Sec: 1.21
|
| 248 |
+
[[34m2025-10-28 04:20:35[0m] (step=0018700) Train Loss: 0.6413, Train Steps/Sec: 1.21
|
| 249 |
+
[[34m2025-10-28 04:21:29[0m] Beginning epoch 15...
|
| 250 |
+
[[34m2025-10-28 04:22:00[0m] (step=0018800) Train Loss: 0.6405, Train Steps/Sec: 1.18
|
| 251 |
+
[[34m2025-10-28 04:23:23[0m] (step=0018900) Train Loss: 0.6394, Train Steps/Sec: 1.21
|
| 252 |
+
[[34m2025-10-28 04:24:45[0m] (step=0019000) Train Loss: 0.6402, Train Steps/Sec: 1.21
|
| 253 |
+
[[34m2025-10-28 04:26:08[0m] (step=0019100) Train Loss: 0.6398, Train Steps/Sec: 1.20
|
| 254 |
+
[[34m2025-10-28 04:27:31[0m] (step=0019200) Train Loss: 0.6405, Train Steps/Sec: 1.21
|
| 255 |
+
[[34m2025-10-28 04:28:53[0m] (step=0019300) Train Loss: 0.6399, Train Steps/Sec: 1.21
|
| 256 |
+
[[34m2025-10-28 04:30:15[0m] (step=0019400) Train Loss: 0.6395, Train Steps/Sec: 1.21
|
| 257 |
+
[[34m2025-10-28 04:31:38[0m] (step=0019500) Train Loss: 0.6410, Train Steps/Sec: 1.21
|
| 258 |
+
[[34m2025-10-28 04:33:00[0m] (step=0019600) Train Loss: 0.6385, Train Steps/Sec: 1.21
|
| 259 |
+
[[34m2025-10-28 04:34:23[0m] (step=0019700) Train Loss: 0.6382, Train Steps/Sec: 1.21
|
| 260 |
+
[[34m2025-10-28 04:35:45[0m] (step=0019800) Train Loss: 0.6388, Train Steps/Sec: 1.21
|
| 261 |
+
[[34m2025-10-28 04:37:07[0m] (step=0019900) Train Loss: 0.6393, Train Steps/Sec: 1.21
|
| 262 |
+
[[34m2025-10-28 04:38:30[0m] (step=0020000) Train Loss: 0.6384, Train Steps/Sec: 1.21
|
| 263 |
+
[[34m2025-10-28 04:38:43[0m] Beginning epoch 16...
|
| 264 |
+
[[34m2025-10-28 04:39:55[0m] (step=0020100) Train Loss: 0.6384, Train Steps/Sec: 1.18
|
| 265 |
+
[[34m2025-10-28 04:41:17[0m] (step=0020200) Train Loss: 0.6388, Train Steps/Sec: 1.21
|
| 266 |
+
[[34m2025-10-28 04:42:40[0m] (step=0020300) Train Loss: 0.6393, Train Steps/Sec: 1.21
|
| 267 |
+
[[34m2025-10-28 04:44:02[0m] (step=0020400) Train Loss: 0.6385, Train Steps/Sec: 1.21
|
| 268 |
+
[[34m2025-10-28 04:45:24[0m] (step=0020500) Train Loss: 0.6374, Train Steps/Sec: 1.21
|
| 269 |
+
[[34m2025-10-28 04:46:47[0m] (step=0020600) Train Loss: 0.6392, Train Steps/Sec: 1.21
|
| 270 |
+
[[34m2025-10-28 04:48:10[0m] (step=0020700) Train Loss: 0.6385, Train Steps/Sec: 1.20
|
| 271 |
+
[[34m2025-10-28 04:49:32[0m] (step=0020800) Train Loss: 0.6385, Train Steps/Sec: 1.21
|
| 272 |
+
[[34m2025-10-28 04:50:55[0m] (step=0020900) Train Loss: 0.6379, Train Steps/Sec: 1.21
|
| 273 |
+
[[34m2025-10-28 04:52:17[0m] (step=0021000) Train Loss: 0.6372, Train Steps/Sec: 1.21
|
| 274 |
+
[[34m2025-10-28 04:53:40[0m] (step=0021100) Train Loss: 0.6362, Train Steps/Sec: 1.21
|
| 275 |
+
[[34m2025-10-28 04:55:02[0m] (step=0021200) Train Loss: 0.6377, Train Steps/Sec: 1.21
|
| 276 |
+
[[34m2025-10-28 04:55:58[0m] Beginning epoch 17...
|
| 277 |
+
[[34m2025-10-28 04:56:27[0m] (step=0021300) Train Loss: 0.6371, Train Steps/Sec: 1.18
|
| 278 |
+
[[34m2025-10-28 04:57:49[0m] (step=0021400) Train Loss: 0.6371, Train Steps/Sec: 1.22
|
| 279 |
+
[[34m2025-10-28 04:59:12[0m] (step=0021500) Train Loss: 0.6372, Train Steps/Sec: 1.21
|
| 280 |
+
[[34m2025-10-28 05:00:34[0m] (step=0021600) Train Loss: 0.6369, Train Steps/Sec: 1.21
|
| 281 |
+
[[34m2025-10-28 05:01:57[0m] (step=0021700) Train Loss: 0.6373, Train Steps/Sec: 1.21
|
| 282 |
+
[[34m2025-10-28 05:03:19[0m] (step=0021800) Train Loss: 0.6369, Train Steps/Sec: 1.21
|
| 283 |
+
[[34m2025-10-28 05:04:41[0m] (step=0021900) Train Loss: 0.6354, Train Steps/Sec: 1.21
|
| 284 |
+
[[34m2025-10-28 05:06:04[0m] (step=0022000) Train Loss: 0.6353, Train Steps/Sec: 1.21
|
| 285 |
+
[[34m2025-10-28 05:07:26[0m] (step=0022100) Train Loss: 0.6356, Train Steps/Sec: 1.21
|
| 286 |
+
[[34m2025-10-28 05:08:49[0m] (step=0022200) Train Loss: 0.6360, Train Steps/Sec: 1.21
|
| 287 |
+
[[34m2025-10-28 05:10:11[0m] (step=0022300) Train Loss: 0.6360, Train Steps/Sec: 1.21
|
| 288 |
+
[[34m2025-10-28 05:11:34[0m] (step=0022400) Train Loss: 0.6350, Train Steps/Sec: 1.20
|
| 289 |
+
[[34m2025-10-28 05:12:57[0m] (step=0022500) Train Loss: 0.6355, Train Steps/Sec: 1.21
|
| 290 |
+
[[34m2025-10-28 05:13:12[0m] Beginning epoch 18...
|
| 291 |
+
[[34m2025-10-28 05:14:22[0m] (step=0022600) Train Loss: 0.6352, Train Steps/Sec: 1.18
|
| 292 |
+
[[34m2025-10-28 05:15:44[0m] (step=0022700) Train Loss: 0.6345, Train Steps/Sec: 1.21
|
| 293 |
+
[[34m2025-10-28 05:17:06[0m] (step=0022800) Train Loss: 0.6364, Train Steps/Sec: 1.21
|
| 294 |
+
[[34m2025-10-28 05:18:29[0m] (step=0022900) Train Loss: 0.6339, Train Steps/Sec: 1.21
|
| 295 |
+
[[34m2025-10-28 05:19:51[0m] (step=0023000) Train Loss: 0.6338, Train Steps/Sec: 1.21
|
| 296 |
+
[[34m2025-10-28 05:21:14[0m] (step=0023100) Train Loss: 0.6343, Train Steps/Sec: 1.21
|
| 297 |
+
[[34m2025-10-28 05:22:36[0m] (step=0023200) Train Loss: 0.6341, Train Steps/Sec: 1.21
|
| 298 |
+
[[34m2025-10-28 05:23:58[0m] (step=0023300) Train Loss: 0.6340, Train Steps/Sec: 1.21
|
| 299 |
+
[[34m2025-10-28 05:25:21[0m] (step=0023400) Train Loss: 0.6348, Train Steps/Sec: 1.21
|
| 300 |
+
[[34m2025-10-28 05:26:43[0m] (step=0023500) Train Loss: 0.6334, Train Steps/Sec: 1.21
|
| 301 |
+
[[34m2025-10-28 05:28:06[0m] (step=0023600) Train Loss: 0.6349, Train Steps/Sec: 1.21
|
| 302 |
+
[[34m2025-10-28 05:29:28[0m] (step=0023700) Train Loss: 0.6343, Train Steps/Sec: 1.21
|
| 303 |
+
[[34m2025-10-28 05:30:25[0m] Beginning epoch 19...
|
| 304 |
+
[[34m2025-10-28 05:30:53[0m] (step=0023800) Train Loss: 0.6340, Train Steps/Sec: 1.18
|
| 305 |
+
[[34m2025-10-28 05:32:15[0m] (step=0023900) Train Loss: 0.6349, Train Steps/Sec: 1.21
|
| 306 |
+
[[34m2025-10-28 05:33:38[0m] (step=0024000) Train Loss: 0.6342, Train Steps/Sec: 1.21
|
| 307 |
+
[[34m2025-10-28 05:35:01[0m] (step=0024100) Train Loss: 0.6345, Train Steps/Sec: 1.21
|
| 308 |
+
[[34m2025-10-28 05:36:23[0m] (step=0024200) Train Loss: 0.6348, Train Steps/Sec: 1.21
|
| 309 |
+
[[34m2025-10-28 05:37:46[0m] (step=0024300) Train Loss: 0.6329, Train Steps/Sec: 1.21
|
| 310 |
+
[[34m2025-10-28 05:39:08[0m] (step=0024400) Train Loss: 0.6338, Train Steps/Sec: 1.21
|
| 311 |
+
[[34m2025-10-28 05:40:30[0m] (step=0024500) Train Loss: 0.6349, Train Steps/Sec: 1.21
|
| 312 |
+
[[34m2025-10-28 05:41:53[0m] (step=0024600) Train Loss: 0.6345, Train Steps/Sec: 1.21
|
| 313 |
+
[[34m2025-10-28 05:43:15[0m] (step=0024700) Train Loss: 0.6331, Train Steps/Sec: 1.21
|
| 314 |
+
[[34m2025-10-28 05:44:38[0m] (step=0024800) Train Loss: 0.6338, Train Steps/Sec: 1.21
|
| 315 |
+
[[34m2025-10-28 05:46:00[0m] (step=0024900) Train Loss: 0.6337, Train Steps/Sec: 1.21
|
| 316 |
+
[[34m2025-10-28 05:47:22[0m] (step=0025000) Train Loss: 0.6328, Train Steps/Sec: 1.21
|
| 317 |
+
[[34m2025-10-28 05:48:19[0m] Saved checkpoint to results/stage2/hfdata/lightningdit-xl-pe-vit-b-bf16/checkpoints/0025000.pt
|
| 318 |
+
[[34m2025-10-28 05:48:19[0m] Generating EMA samples...
|
| 319 |
+
[[34m2025-10-28 05:48:47[0m] Generating EMA samples done.
|
| 320 |
+
[[34m2025-10-28 05:49:04[0m] Beginning epoch 20...
|
| 321 |
+
[[34m2025-10-28 05:50:12[0m] (step=0025100) Train Loss: 0.6327, Train Steps/Sec: 0.59
|
| 322 |
+
[[34m2025-10-28 05:51:34[0m] (step=0025200) Train Loss: 0.6322, Train Steps/Sec: 1.21
|
| 323 |
+
[[34m2025-10-28 05:52:57[0m] (step=0025300) Train Loss: 0.6323, Train Steps/Sec: 1.21
|
| 324 |
+
[[34m2025-10-28 05:54:19[0m] (step=0025400) Train Loss: 0.6328, Train Steps/Sec: 1.21
|
| 325 |
+
[[34m2025-10-28 05:55:41[0m] (step=0025500) Train Loss: 0.6334, Train Steps/Sec: 1.21
|
| 326 |
+
[[34m2025-10-28 05:57:04[0m] (step=0025600) Train Loss: 0.6334, Train Steps/Sec: 1.21
|
| 327 |
+
[[34m2025-10-28 05:58:27[0m] (step=0025700) Train Loss: 0.6322, Train Steps/Sec: 1.20
|
| 328 |
+
[[34m2025-10-28 05:59:50[0m] (step=0025800) Train Loss: 0.6326, Train Steps/Sec: 1.21
|
| 329 |
+
[[34m2025-10-28 06:01:12[0m] (step=0025900) Train Loss: 0.6308, Train Steps/Sec: 1.21
|
| 330 |
+
[[34m2025-10-28 06:02:35[0m] (step=0026000) Train Loss: 0.6322, Train Steps/Sec: 1.21
|
| 331 |
+
[[34m2025-10-28 06:03:57[0m] (step=0026100) Train Loss: 0.6315, Train Steps/Sec: 1.21
|
| 332 |
+
[[34m2025-10-28 06:05:19[0m] (step=0026200) Train Loss: 0.6327, Train Steps/Sec: 1.21
|
| 333 |
+
[[34m2025-10-28 06:06:18[0m] Beginning epoch 21...
|
| 334 |
+
[[34m2025-10-28 06:06:44[0m] (step=0026300) Train Loss: 0.6314, Train Steps/Sec: 1.18
|
| 335 |
+
[[34m2025-10-28 06:08:07[0m] (step=0026400) Train Loss: 0.6328, Train Steps/Sec: 1.21
|
| 336 |
+
[[34m2025-10-28 06:09:29[0m] (step=0026500) Train Loss: 0.6299, Train Steps/Sec: 1.21
|
| 337 |
+
[[34m2025-10-28 06:10:51[0m] (step=0026600) Train Loss: 0.6303, Train Steps/Sec: 1.21
|
| 338 |
+
[[34m2025-10-28 06:12:14[0m] (step=0026700) Train Loss: 0.6315, Train Steps/Sec: 1.21
|
| 339 |
+
[[34m2025-10-28 06:13:36[0m] (step=0026800) Train Loss: 0.6316, Train Steps/Sec: 1.21
|
| 340 |
+
[[34m2025-10-28 06:14:59[0m] (step=0026900) Train Loss: 0.6318, Train Steps/Sec: 1.21
|
| 341 |
+
[[34m2025-10-28 06:16:21[0m] (step=0027000) Train Loss: 0.6314, Train Steps/Sec: 1.21
|
| 342 |
+
[[34m2025-10-28 06:17:44[0m] (step=0027100) Train Loss: 0.6306, Train Steps/Sec: 1.21
|
| 343 |
+
[[34m2025-10-28 06:19:06[0m] (step=0027200) Train Loss: 0.6313, Train Steps/Sec: 1.21
|
| 344 |
+
[[34m2025-10-28 06:20:29[0m] (step=0027300) Train Loss: 0.6308, Train Steps/Sec: 1.21
|
| 345 |
+
[[34m2025-10-28 06:21:52[0m] (step=0027400) Train Loss: 0.6302, Train Steps/Sec: 1.20
|
| 346 |
+
[[34m2025-10-28 06:23:14[0m] (step=0027500) Train Loss: 0.6295, Train Steps/Sec: 1.21
|
| 347 |
+
[[34m2025-10-28 06:23:33[0m] Beginning epoch 22...
|
| 348 |
+
[[34m2025-10-28 06:24:39[0m] (step=0027600) Train Loss: 0.6300, Train Steps/Sec: 1.18
|
| 349 |
+
[[34m2025-10-28 06:26:01[0m] (step=0027700) Train Loss: 0.6295, Train Steps/Sec: 1.21
|
| 350 |
+
[[34m2025-10-28 06:27:24[0m] (step=0027800) Train Loss: 0.6311, Train Steps/Sec: 1.21
|
| 351 |
+
[[34m2025-10-28 06:28:46[0m] (step=0027900) Train Loss: 0.6313, Train Steps/Sec: 1.21
|
| 352 |
+
[[34m2025-10-28 06:30:08[0m] (step=0028000) Train Loss: 0.6301, Train Steps/Sec: 1.21
|
| 353 |
+
[[34m2025-10-28 06:31:31[0m] (step=0028100) Train Loss: 0.6304, Train Steps/Sec: 1.21
|
| 354 |
+
[[34m2025-10-28 06:32:53[0m] (step=0028200) Train Loss: 0.6300, Train Steps/Sec: 1.21
|
| 355 |
+
[[34m2025-10-28 06:34:16[0m] (step=0028300) Train Loss: 0.6310, Train Steps/Sec: 1.21
|
| 356 |
+
[[34m2025-10-28 06:35:38[0m] (step=0028400) Train Loss: 0.6282, Train Steps/Sec: 1.21
|
| 357 |
+
[[34m2025-10-28 06:37:00[0m] (step=0028500) Train Loss: 0.6297, Train Steps/Sec: 1.21
|
| 358 |
+
[[34m2025-10-28 06:38:23[0m] (step=0028600) Train Loss: 0.6297, Train Steps/Sec: 1.21
|
| 359 |
+
[[34m2025-10-28 06:39:45[0m] (step=0028700) Train Loss: 0.6284, Train Steps/Sec: 1.21
|
| 360 |
+
[[34m2025-10-28 06:40:46[0m] Beginning epoch 23...
|
| 361 |
+
[[34m2025-10-28 06:41:10[0m] (step=0028800) Train Loss: 0.6292, Train Steps/Sec: 1.18
|
| 362 |
+
[[34m2025-10-28 06:42:33[0m] (step=0028900) Train Loss: 0.6311, Train Steps/Sec: 1.21
|
| 363 |
+
[[34m2025-10-28 06:43:56[0m] (step=0029000) Train Loss: 0.6288, Train Steps/Sec: 1.21
|
| 364 |
+
[[34m2025-10-28 06:45:18[0m] (step=0029100) Train Loss: 0.6291, Train Steps/Sec: 1.21
|
| 365 |
+
[[34m2025-10-28 06:46:41[0m] (step=0029200) Train Loss: 0.6283, Train Steps/Sec: 1.21
|
| 366 |
+
[[34m2025-10-28 06:48:03[0m] (step=0029300) Train Loss: 0.6283, Train Steps/Sec: 1.21
|
| 367 |
+
[[34m2025-10-28 06:49:26[0m] (step=0029400) Train Loss: 0.6271, Train Steps/Sec: 1.21
|
| 368 |
+
[[34m2025-10-28 06:50:48[0m] (step=0029500) Train Loss: 0.6297, Train Steps/Sec: 1.21
|
| 369 |
+
[[34m2025-10-28 06:52:10[0m] (step=0029600) Train Loss: 0.6289, Train Steps/Sec: 1.21
|
| 370 |
+
[[34m2025-10-28 06:53:33[0m] (step=0029700) Train Loss: 0.6300, Train Steps/Sec: 1.21
|
| 371 |
+
[[34m2025-10-28 06:54:55[0m] (step=0029800) Train Loss: 0.6293, Train Steps/Sec: 1.21
|
| 372 |
+
[[34m2025-10-28 06:56:18[0m] (step=0029900) Train Loss: 0.6299, Train Steps/Sec: 1.21
|
| 373 |
+
[[34m2025-10-28 06:57:40[0m] (step=0030000) Train Loss: 0.6288, Train Steps/Sec: 1.21
|
| 374 |
+
[[34m2025-10-28 06:58:00[0m] Beginning epoch 24...
|
| 375 |
+
[[34m2025-10-28 06:59:05[0m] (step=0030100) Train Loss: 0.6292, Train Steps/Sec: 1.17
|
| 376 |
+
[[34m2025-10-28 07:00:28[0m] (step=0030200) Train Loss: 0.6285, Train Steps/Sec: 1.21
|
| 377 |
+
[[34m2025-10-28 07:01:50[0m] (step=0030300) Train Loss: 0.6278, Train Steps/Sec: 1.21
|
| 378 |
+
[[34m2025-10-28 07:03:12[0m] (step=0030400) Train Loss: 0.6280, Train Steps/Sec: 1.21
|
| 379 |
+
[[34m2025-10-28 07:04:35[0m] (step=0030500) Train Loss: 0.6285, Train Steps/Sec: 1.21
|
| 380 |
+
[[34m2025-10-28 07:05:57[0m] (step=0030600) Train Loss: 0.6271, Train Steps/Sec: 1.21
|
| 381 |
+
[[34m2025-10-28 07:07:20[0m] (step=0030700) Train Loss: 0.6280, Train Steps/Sec: 1.20
|
| 382 |
+
[[34m2025-10-28 07:08:43[0m] (step=0030800) Train Loss: 0.6284, Train Steps/Sec: 1.21
|
| 383 |
+
[[34m2025-10-28 07:10:05[0m] (step=0030900) Train Loss: 0.6280, Train Steps/Sec: 1.22
|
| 384 |
+
[[34m2025-10-28 07:11:27[0m] (step=0031000) Train Loss: 0.6273, Train Steps/Sec: 1.22
|
| 385 |
+
[[34m2025-10-28 07:12:50[0m] (step=0031100) Train Loss: 0.6289, Train Steps/Sec: 1.21
|
| 386 |
+
[[34m2025-10-28 07:14:12[0m] (step=0031200) Train Loss: 0.6271, Train Steps/Sec: 1.21
|
| 387 |
+
[[34m2025-10-28 07:15:14[0m] Beginning epoch 25...
|
| 388 |
+
[[34m2025-10-28 07:15:37[0m] (step=0031300) Train Loss: 0.6275, Train Steps/Sec: 1.18
|
| 389 |
+
[[34m2025-10-28 07:17:00[0m] (step=0031400) Train Loss: 0.6274, Train Steps/Sec: 1.21
|
| 390 |
+
[[34m2025-10-28 07:18:22[0m] (step=0031500) Train Loss: 0.6270, Train Steps/Sec: 1.21
|
| 391 |
+
[[34m2025-10-28 07:19:44[0m] (step=0031600) Train Loss: 0.6268, Train Steps/Sec: 1.21
|
| 392 |
+
[[34m2025-10-28 07:21:07[0m] (step=0031700) Train Loss: 0.6258, Train Steps/Sec: 1.21
|
| 393 |
+
[[34m2025-10-28 07:22:29[0m] (step=0031800) Train Loss: 0.6275, Train Steps/Sec: 1.21
|
| 394 |
+
[[34m2025-10-28 07:23:51[0m] (step=0031900) Train Loss: 0.6259, Train Steps/Sec: 1.21
|
| 395 |
+
[[34m2025-10-28 07:25:14[0m] (step=0032000) Train Loss: 0.6269, Train Steps/Sec: 1.21
|
| 396 |
+
[[34m2025-10-28 07:26:36[0m] (step=0032100) Train Loss: 0.6261, Train Steps/Sec: 1.21
|
| 397 |
+
[[34m2025-10-28 07:27:59[0m] (step=0032200) Train Loss: 0.6265, Train Steps/Sec: 1.21
|
| 398 |
+
[[34m2025-10-28 07:29:21[0m] (step=0032300) Train Loss: 0.6273, Train Steps/Sec: 1.21
|
| 399 |
+
[[34m2025-10-28 07:30:44[0m] (step=0032400) Train Loss: 0.6262, Train Steps/Sec: 1.20
|
| 400 |
+
[[34m2025-10-28 07:32:07[0m] (step=0032500) Train Loss: 0.6270, Train Steps/Sec: 1.22
|
| 401 |
+
[[34m2025-10-28 07:32:29[0m] Beginning epoch 26...
|
| 402 |
+
[[34m2025-10-28 07:33:31[0m] (step=0032600) Train Loss: 0.6261, Train Steps/Sec: 1.18
|
| 403 |
+
[[34m2025-10-28 07:34:54[0m] (step=0032700) Train Loss: 0.6258, Train Steps/Sec: 1.21
|
| 404 |
+
[[34m2025-10-28 07:36:16[0m] (step=0032800) Train Loss: 0.6261, Train Steps/Sec: 1.21
|
| 405 |
+
[[34m2025-10-28 07:37:39[0m] (step=0032900) Train Loss: 0.6261, Train Steps/Sec: 1.21
|
| 406 |
+
[[34m2025-10-28 07:39:01[0m] (step=0033000) Train Loss: 0.6277, Train Steps/Sec: 1.21
|
| 407 |
+
[[34m2025-10-28 07:40:24[0m] (step=0033100) Train Loss: 0.6260, Train Steps/Sec: 1.21
|
| 408 |
+
[[34m2025-10-28 07:41:46[0m] (step=0033200) Train Loss: 0.6250, Train Steps/Sec: 1.21
|
| 409 |
+
[[34m2025-10-28 07:43:08[0m] (step=0033300) Train Loss: 0.6261, Train Steps/Sec: 1.21
|
| 410 |
+
[[34m2025-10-28 07:44:31[0m] (step=0033400) Train Loss: 0.6272, Train Steps/Sec: 1.21
|
| 411 |
+
[[34m2025-10-28 07:45:53[0m] (step=0033500) Train Loss: 0.6256, Train Steps/Sec: 1.21
|
| 412 |
+
[[34m2025-10-28 07:47:15[0m] (step=0033600) Train Loss: 0.6264, Train Steps/Sec: 1.21
|
| 413 |
+
[[34m2025-10-28 07:48:38[0m] (step=0033700) Train Loss: 0.6268, Train Steps/Sec: 1.21
|
| 414 |
+
[[34m2025-10-28 07:49:42[0m] Beginning epoch 27...
|
| 415 |
+
[[34m2025-10-28 07:50:03[0m] (step=0033800) Train Loss: 0.6250, Train Steps/Sec: 1.18
|
| 416 |
+
[[34m2025-10-28 07:51:25[0m] (step=0033900) Train Loss: 0.6241, Train Steps/Sec: 1.21
|
| 417 |
+
[[34m2025-10-28 07:52:48[0m] (step=0034000) Train Loss: 0.6246, Train Steps/Sec: 1.20
|
| 418 |
+
[[34m2025-10-28 07:54:11[0m] (step=0034100) Train Loss: 0.6249, Train Steps/Sec: 1.21
|
| 419 |
+
[[34m2025-10-28 07:55:34[0m] (step=0034200) Train Loss: 0.6244, Train Steps/Sec: 1.21
|
| 420 |
+
[[34m2025-10-28 07:56:56[0m] (step=0034300) Train Loss: 0.6250, Train Steps/Sec: 1.21
|
| 421 |
+
[[34m2025-10-28 07:58:18[0m] (step=0034400) Train Loss: 0.6258, Train Steps/Sec: 1.21
|
| 422 |
+
[[34m2025-10-28 07:59:41[0m] (step=0034500) Train Loss: 0.6240, Train Steps/Sec: 1.21
|
| 423 |
+
[[34m2025-10-28 08:01:03[0m] (step=0034600) Train Loss: 0.6254, Train Steps/Sec: 1.22
|
| 424 |
+
[[34m2025-10-28 08:02:25[0m] (step=0034700) Train Loss: 0.6247, Train Steps/Sec: 1.21
|
| 425 |
+
[[34m2025-10-28 08:03:48[0m] (step=0034800) Train Loss: 0.6245, Train Steps/Sec: 1.21
|
| 426 |
+
[[34m2025-10-28 08:05:10[0m] (step=0034900) Train Loss: 0.6244, Train Steps/Sec: 1.21
|
| 427 |
+
[[34m2025-10-28 08:06:32[0m] (step=0035000) Train Loss: 0.6252, Train Steps/Sec: 1.21
|
| 428 |
+
[[34m2025-10-28 08:06:56[0m] Beginning epoch 28...
|
| 429 |
+
[[34m2025-10-28 08:07:57[0m] (step=0035100) Train Loss: 0.6232, Train Steps/Sec: 1.18
|
| 430 |
+
[[34m2025-10-28 08:09:19[0m] (step=0035200) Train Loss: 0.6235, Train Steps/Sec: 1.21
|
| 431 |
+
[[34m2025-10-28 08:10:42[0m] (step=0035300) Train Loss: 0.6239, Train Steps/Sec: 1.21
|
| 432 |
+
[[34m2025-10-28 08:12:04[0m] (step=0035400) Train Loss: 0.6249, Train Steps/Sec: 1.21
|
| 433 |
+
[[34m2025-10-28 08:13:27[0m] (step=0035500) Train Loss: 0.6226, Train Steps/Sec: 1.21
|
| 434 |
+
[[34m2025-10-28 08:14:49[0m] (step=0035600) Train Loss: 0.6226, Train Steps/Sec: 1.21
|
| 435 |
+
[[34m2025-10-28 08:16:12[0m] (step=0035700) Train Loss: 0.6252, Train Steps/Sec: 1.21
|
| 436 |
+
[[34m2025-10-28 08:17:35[0m] (step=0035800) Train Loss: 0.6251, Train Steps/Sec: 1.21
|
| 437 |
+
[[34m2025-10-28 08:18:57[0m] (step=0035900) Train Loss: 0.6238, Train Steps/Sec: 1.21
|
| 438 |
+
[[34m2025-10-28 08:20:19[0m] (step=0036000) Train Loss: 0.6237, Train Steps/Sec: 1.21
|
| 439 |
+
[[34m2025-10-28 08:21:42[0m] (step=0036100) Train Loss: 0.6239, Train Steps/Sec: 1.21
|
| 440 |
+
[[34m2025-10-28 08:23:04[0m] (step=0036200) Train Loss: 0.6239, Train Steps/Sec: 1.21
|
| 441 |
+
[[34m2025-10-28 08:24:10[0m] Beginning epoch 29...
|
| 442 |
+
[[34m2025-10-28 08:24:29[0m] (step=0036300) Train Loss: 0.6241, Train Steps/Sec: 1.18
|
| 443 |
+
[[34m2025-10-28 08:25:51[0m] (step=0036400) Train Loss: 0.6250, Train Steps/Sec: 1.21
|
| 444 |
+
[[34m2025-10-28 08:27:14[0m] (step=0036500) Train Loss: 0.6238, Train Steps/Sec: 1.21
|
| 445 |
+
[[34m2025-10-28 08:28:36[0m] (step=0036600) Train Loss: 0.6237, Train Steps/Sec: 1.21
|
| 446 |
+
[[34m2025-10-28 08:29:58[0m] (step=0036700) Train Loss: 0.6243, Train Steps/Sec: 1.21
|
| 447 |
+
[[34m2025-10-28 08:31:21[0m] (step=0036800) Train Loss: 0.6229, Train Steps/Sec: 1.21
|
| 448 |
+
[[34m2025-10-28 08:32:43[0m] (step=0036900) Train Loss: 0.6244, Train Steps/Sec: 1.21
|
| 449 |
+
[[34m2025-10-28 08:34:05[0m] (step=0037000) Train Loss: 0.6225, Train Steps/Sec: 1.21
|
| 450 |
+
[[34m2025-10-28 08:35:28[0m] (step=0037100) Train Loss: 0.6230, Train Steps/Sec: 1.21
|
| 451 |
+
[[34m2025-10-28 08:36:50[0m] (step=0037200) Train Loss: 0.6239, Train Steps/Sec: 1.22
|
| 452 |
+
[[34m2025-10-28 08:38:13[0m] (step=0037300) Train Loss: 0.6235, Train Steps/Sec: 1.21
|
| 453 |
+
[[34m2025-10-28 08:39:36[0m] (step=0037400) Train Loss: 0.6227, Train Steps/Sec: 1.21
|
| 454 |
+
[[34m2025-10-28 08:40:58[0m] (step=0037500) Train Loss: 0.6235, Train Steps/Sec: 1.22
|
| 455 |
+
[[34m2025-10-28 08:41:23[0m] Beginning epoch 30...
|
| 456 |
+
[[34m2025-10-28 08:42:23[0m] (step=0037600) Train Loss: 0.6230, Train Steps/Sec: 1.18
|
| 457 |
+
[[34m2025-10-28 08:43:45[0m] (step=0037700) Train Loss: 0.6229, Train Steps/Sec: 1.21
|
| 458 |
+
[[34m2025-10-28 08:45:08[0m] (step=0037800) Train Loss: 0.6235, Train Steps/Sec: 1.21
|
| 459 |
+
[[34m2025-10-28 08:46:30[0m] (step=0037900) Train Loss: 0.6227, Train Steps/Sec: 1.21
|
| 460 |
+
[[34m2025-10-28 08:47:52[0m] (step=0038000) Train Loss: 0.6214, Train Steps/Sec: 1.21
|
| 461 |
+
[[34m2025-10-28 08:49:15[0m] (step=0038100) Train Loss: 0.6211, Train Steps/Sec: 1.21
|
| 462 |
+
[[34m2025-10-28 08:50:37[0m] (step=0038200) Train Loss: 0.6245, Train Steps/Sec: 1.21
|
| 463 |
+
[[34m2025-10-28 08:51:59[0m] (step=0038300) Train Loss: 0.6220, Train Steps/Sec: 1.21
|
| 464 |
+
[[34m2025-10-28 08:53:22[0m] (step=0038400) Train Loss: 0.6226, Train Steps/Sec: 1.21
|
| 465 |
+
[[34m2025-10-28 08:54:44[0m] (step=0038500) Train Loss: 0.6218, Train Steps/Sec: 1.21
|
| 466 |
+
[[34m2025-10-28 08:56:07[0m] (step=0038600) Train Loss: 0.6220, Train Steps/Sec: 1.21
|
| 467 |
+
[[34m2025-10-28 08:57:29[0m] (step=0038700) Train Loss: 0.6230, Train Steps/Sec: 1.21
|
| 468 |
+
[[34m2025-10-28 08:58:36[0m] Beginning epoch 31...
|
| 469 |
+
[[34m2025-10-28 08:58:54[0m] (step=0038800) Train Loss: 0.6215, Train Steps/Sec: 1.18
|
| 470 |
+
[[34m2025-10-28 09:00:16[0m] (step=0038900) Train Loss: 0.6223, Train Steps/Sec: 1.21
|
| 471 |
+
[[34m2025-10-28 09:01:39[0m] (step=0039000) Train Loss: 0.6210, Train Steps/Sec: 1.20
|
| 472 |
+
[[34m2025-10-28 09:03:02[0m] (step=0039100) Train Loss: 0.6221, Train Steps/Sec: 1.21
|
| 473 |
+
[[34m2025-10-28 09:04:24[0m] (step=0039200) Train Loss: 0.6213, Train Steps/Sec: 1.21
|
| 474 |
+
[[34m2025-10-28 09:05:47[0m] (step=0039300) Train Loss: 0.6229, Train Steps/Sec: 1.21
|
| 475 |
+
[[34m2025-10-28 09:07:09[0m] (step=0039400) Train Loss: 0.6219, Train Steps/Sec: 1.21
|
| 476 |
+
[[34m2025-10-28 09:08:31[0m] (step=0039500) Train Loss: 0.6222, Train Steps/Sec: 1.21
|
| 477 |
+
[[34m2025-10-28 09:09:54[0m] (step=0039600) Train Loss: 0.6225, Train Steps/Sec: 1.21
|
| 478 |
+
[[34m2025-10-28 09:11:16[0m] (step=0039700) Train Loss: 0.6218, Train Steps/Sec: 1.21
|
| 479 |
+
[[34m2025-10-28 09:12:39[0m] (step=0039800) Train Loss: 0.6211, Train Steps/Sec: 1.21
|
| 480 |
+
[[34m2025-10-28 09:14:01[0m] (step=0039900) Train Loss: 0.6219, Train Steps/Sec: 1.21
|
| 481 |
+
[[34m2025-10-28 09:15:23[0m] (step=0040000) Train Loss: 0.6215, Train Steps/Sec: 1.21
|
| 482 |
+
[[34m2025-10-28 09:15:50[0m] Beginning epoch 32...
|
| 483 |
+
[[34m2025-10-28 09:16:48[0m] (step=0040100) Train Loss: 0.6213, Train Steps/Sec: 1.18
|
| 484 |
+
[[34m2025-10-28 09:18:11[0m] (step=0040200) Train Loss: 0.6216, Train Steps/Sec: 1.21
|
| 485 |
+
[[34m2025-10-28 09:19:33[0m] (step=0040300) Train Loss: 0.6215, Train Steps/Sec: 1.22
|
| 486 |
+
[[34m2025-10-28 09:20:55[0m] (step=0040400) Train Loss: 0.6207, Train Steps/Sec: 1.22
|
| 487 |
+
[[34m2025-10-28 09:22:17[0m] (step=0040500) Train Loss: 0.6213, Train Steps/Sec: 1.21
|
| 488 |
+
[[34m2025-10-28 09:23:40[0m] (step=0040600) Train Loss: 0.6214, Train Steps/Sec: 1.21
|
| 489 |
+
[[34m2025-10-28 09:25:04[0m] (step=0040700) Train Loss: 0.6220, Train Steps/Sec: 1.20
|
| 490 |
+
[[34m2025-10-28 09:26:26[0m] (step=0040800) Train Loss: 0.6210, Train Steps/Sec: 1.21
|
| 491 |
+
[[34m2025-10-28 09:27:48[0m] (step=0040900) Train Loss: 0.6210, Train Steps/Sec: 1.22
|
| 492 |
+
[[34m2025-10-28 09:29:10[0m] (step=0041000) Train Loss: 0.6201, Train Steps/Sec: 1.21
|
| 493 |
+
[[34m2025-10-28 09:30:33[0m] (step=0041100) Train Loss: 0.6214, Train Steps/Sec: 1.21
|
| 494 |
+
[[34m2025-10-28 09:31:55[0m] (step=0041200) Train Loss: 0.6216, Train Steps/Sec: 1.21
|
| 495 |
+
[[34m2025-10-28 09:33:04[0m] Beginning epoch 33...
|
| 496 |
+
[[34m2025-10-28 09:33:20[0m] (step=0041300) Train Loss: 0.6222, Train Steps/Sec: 1.18
|
| 497 |
+
[[34m2025-10-28 09:34:43[0m] (step=0041400) Train Loss: 0.6208, Train Steps/Sec: 1.21
|
| 498 |
+
[[34m2025-10-28 09:36:05[0m] (step=0041500) Train Loss: 0.6200, Train Steps/Sec: 1.21
|
| 499 |
+
[[34m2025-10-28 09:37:27[0m] (step=0041600) Train Loss: 0.6208, Train Steps/Sec: 1.21
|
| 500 |
+
[[34m2025-10-28 09:38:50[0m] (step=0041700) Train Loss: 0.6198, Train Steps/Sec: 1.21
|
| 501 |
+
[[34m2025-10-28 09:40:12[0m] (step=0041800) Train Loss: 0.6202, Train Steps/Sec: 1.21
|
| 502 |
+
[[34m2025-10-28 09:41:35[0m] (step=0041900) Train Loss: 0.6212, Train Steps/Sec: 1.21
|
| 503 |
+
[[34m2025-10-28 09:42:57[0m] (step=0042000) Train Loss: 0.6211, Train Steps/Sec: 1.21
|
| 504 |
+
[[34m2025-10-28 09:44:19[0m] (step=0042100) Train Loss: 0.6202, Train Steps/Sec: 1.21
|
| 505 |
+
[[34m2025-10-28 09:45:42[0m] (step=0042200) Train Loss: 0.6217, Train Steps/Sec: 1.21
|
| 506 |
+
[[34m2025-10-28 09:47:05[0m] (step=0042300) Train Loss: 0.6190, Train Steps/Sec: 1.21
|
| 507 |
+
[[34m2025-10-28 09:48:28[0m] (step=0042400) Train Loss: 0.6194, Train Steps/Sec: 1.21
|
| 508 |
+
[[34m2025-10-28 09:49:50[0m] (step=0042500) Train Loss: 0.6210, Train Steps/Sec: 1.21
|
| 509 |
+
[[34m2025-10-28 09:50:19[0m] Beginning epoch 34...
|
| 510 |
+
[[34m2025-10-28 09:51:15[0m] (step=0042600) Train Loss: 0.6194, Train Steps/Sec: 1.17
|
| 511 |
+
[[34m2025-10-28 09:52:38[0m] (step=0042700) Train Loss: 0.6212, Train Steps/Sec: 1.21
|
| 512 |
+
[[34m2025-10-28 09:54:00[0m] (step=0042800) Train Loss: 0.6197, Train Steps/Sec: 1.21
|
| 513 |
+
[[34m2025-10-28 09:55:22[0m] (step=0042900) Train Loss: 0.6204, Train Steps/Sec: 1.21
|
| 514 |
+
[[34m2025-10-28 09:56:45[0m] (step=0043000) Train Loss: 0.6206, Train Steps/Sec: 1.21
|
| 515 |
+
[[34m2025-10-28 09:58:07[0m] (step=0043100) Train Loss: 0.6196, Train Steps/Sec: 1.21
|
| 516 |
+
[[34m2025-10-28 09:59:30[0m] (step=0043200) Train Loss: 0.6199, Train Steps/Sec: 1.21
|
| 517 |
+
[[34m2025-10-28 10:00:52[0m] (step=0043300) Train Loss: 0.6195, Train Steps/Sec: 1.21
|
| 518 |
+
[[34m2025-10-28 10:02:14[0m] (step=0043400) Train Loss: 0.6182, Train Steps/Sec: 1.21
|
| 519 |
+
[[34m2025-10-28 10:03:37[0m] (step=0043500) Train Loss: 0.6184, Train Steps/Sec: 1.21
|
| 520 |
+
[[34m2025-10-28 10:04:59[0m] (step=0043600) Train Loss: 0.6191, Train Steps/Sec: 1.21
|
| 521 |
+
[[34m2025-10-28 10:06:22[0m] (step=0043700) Train Loss: 0.6198, Train Steps/Sec: 1.21
|
| 522 |
+
[[34m2025-10-28 10:07:32[0m] Beginning epoch 35...
|
| 523 |
+
[[34m2025-10-28 10:07:47[0m] (step=0043800) Train Loss: 0.6197, Train Steps/Sec: 1.18
|
| 524 |
+
[[34m2025-10-28 10:09:09[0m] (step=0043900) Train Loss: 0.6188, Train Steps/Sec: 1.21
|
| 525 |
+
[[34m2025-10-28 10:10:32[0m] (step=0044000) Train Loss: 0.6202, Train Steps/Sec: 1.20
|
| 526 |
+
[[34m2025-10-28 10:11:55[0m] (step=0044100) Train Loss: 0.6196, Train Steps/Sec: 1.21
|
| 527 |
+
[[34m2025-10-28 10:13:17[0m] (step=0044200) Train Loss: 0.6192, Train Steps/Sec: 1.21
|
| 528 |
+
[[34m2025-10-28 10:14:40[0m] (step=0044300) Train Loss: 0.6194, Train Steps/Sec: 1.21
|
| 529 |
+
[[34m2025-10-28 10:16:02[0m] (step=0044400) Train Loss: 0.6183, Train Steps/Sec: 1.21
|
| 530 |
+
[[34m2025-10-28 10:17:24[0m] (step=0044500) Train Loss: 0.6198, Train Steps/Sec: 1.21
|
| 531 |
+
[[34m2025-10-28 10:18:47[0m] (step=0044600) Train Loss: 0.6188, Train Steps/Sec: 1.21
|
| 532 |
+
[[34m2025-10-28 10:20:09[0m] (step=0044700) Train Loss: 0.6202, Train Steps/Sec: 1.21
|
| 533 |
+
[[34m2025-10-28 10:21:31[0m] (step=0044800) Train Loss: 0.6190, Train Steps/Sec: 1.21
|
| 534 |
+
[[34m2025-10-28 10:22:54[0m] (step=0044900) Train Loss: 0.6183, Train Steps/Sec: 1.21
|
| 535 |
+
[[34m2025-10-28 10:24:16[0m] (step=0045000) Train Loss: 0.6187, Train Steps/Sec: 1.22
|
| 536 |
+
[[34m2025-10-28 10:24:46[0m] Beginning epoch 36...
|
| 537 |
+
[[34m2025-10-28 10:25:41[0m] (step=0045100) Train Loss: 0.6187, Train Steps/Sec: 1.17
|
| 538 |
+
[[34m2025-10-28 10:27:04[0m] (step=0045200) Train Loss: 0.6185, Train Steps/Sec: 1.21
|
| 539 |
+
[[34m2025-10-28 10:28:26[0m] (step=0045300) Train Loss: 0.6189, Train Steps/Sec: 1.21
|
| 540 |
+
[[34m2025-10-28 10:29:48[0m] (step=0045400) Train Loss: 0.6186, Train Steps/Sec: 1.21
|
| 541 |
+
[[34m2025-10-28 10:31:11[0m] (step=0045500) Train Loss: 0.6180, Train Steps/Sec: 1.21
|
| 542 |
+
[[34m2025-10-28 10:32:34[0m] (step=0045600) Train Loss: 0.6185, Train Steps/Sec: 1.21
|
| 543 |
+
[[34m2025-10-28 10:33:57[0m] (step=0045700) Train Loss: 0.6188, Train Steps/Sec: 1.21
|
| 544 |
+
[[34m2025-10-28 10:35:19[0m] (step=0045800) Train Loss: 0.6177, Train Steps/Sec: 1.21
|
| 545 |
+
[[34m2025-10-28 10:36:41[0m] (step=0045900) Train Loss: 0.6185, Train Steps/Sec: 1.21
|
| 546 |
+
[[34m2025-10-28 10:38:04[0m] (step=0046000) Train Loss: 0.6175, Train Steps/Sec: 1.21
|
| 547 |
+
[[34m2025-10-28 10:39:26[0m] (step=0046100) Train Loss: 0.6183, Train Steps/Sec: 1.21
|
| 548 |
+
[[34m2025-10-28 10:40:48[0m] (step=0046200) Train Loss: 0.6182, Train Steps/Sec: 1.21
|
| 549 |
+
[[34m2025-10-28 10:42:00[0m] Beginning epoch 37...
|
| 550 |
+
[[34m2025-10-28 10:42:13[0m] (step=0046300) Train Loss: 0.6180, Train Steps/Sec: 1.18
|
| 551 |
+
[[34m2025-10-28 10:43:36[0m] (step=0046400) Train Loss: 0.6183, Train Steps/Sec: 1.21
|
| 552 |
+
[[34m2025-10-28 10:44:58[0m] (step=0046500) Train Loss: 0.6183, Train Steps/Sec: 1.21
|
| 553 |
+
[[34m2025-10-28 10:46:20[0m] (step=0046600) Train Loss: 0.6181, Train Steps/Sec: 1.21
|
| 554 |
+
[[34m2025-10-28 10:47:43[0m] (step=0046700) Train Loss: 0.6180, Train Steps/Sec: 1.21
|
| 555 |
+
[[34m2025-10-28 10:49:05[0m] (step=0046800) Train Loss: 0.6179, Train Steps/Sec: 1.21
|
| 556 |
+
[[34m2025-10-28 10:50:28[0m] (step=0046900) Train Loss: 0.6175, Train Steps/Sec: 1.21
|
| 557 |
+
[[34m2025-10-28 10:51:50[0m] (step=0047000) Train Loss: 0.6184, Train Steps/Sec: 1.21
|
| 558 |
+
[[34m2025-10-28 10:53:12[0m] (step=0047100) Train Loss: 0.6180, Train Steps/Sec: 1.22
|
| 559 |
+
[[34m2025-10-28 10:54:35[0m] (step=0047200) Train Loss: 0.6178, Train Steps/Sec: 1.21
|
| 560 |
+
[[34m2025-10-28 10:55:58[0m] (step=0047300) Train Loss: 0.6179, Train Steps/Sec: 1.20
|
| 561 |
+
[[34m2025-10-28 10:57:20[0m] (step=0047400) Train Loss: 0.6184, Train Steps/Sec: 1.21
|
| 562 |
+
[[34m2025-10-28 10:58:43[0m] (step=0047500) Train Loss: 0.6162, Train Steps/Sec: 1.21
|
| 563 |
+
[[34m2025-10-28 10:59:15[0m] Beginning epoch 38...
|
| 564 |
+
[[34m2025-10-28 11:00:08[0m] (step=0047600) Train Loss: 0.6168, Train Steps/Sec: 1.18
|
| 565 |
+
[[34m2025-10-28 11:01:30[0m] (step=0047700) Train Loss: 0.6176, Train Steps/Sec: 1.21
|
| 566 |
+
[[34m2025-10-28 11:02:53[0m] (step=0047800) Train Loss: 0.6167, Train Steps/Sec: 1.21
|
| 567 |
+
[[34m2025-10-28 11:04:15[0m] (step=0047900) Train Loss: 0.6172, Train Steps/Sec: 1.21
|
| 568 |
+
[[34m2025-10-28 11:05:37[0m] (step=0048000) Train Loss: 0.6173, Train Steps/Sec: 1.21
|
| 569 |
+
[[34m2025-10-28 11:07:00[0m] (step=0048100) Train Loss: 0.6162, Train Steps/Sec: 1.21
|
| 570 |
+
[[34m2025-10-28 11:08:22[0m] (step=0048200) Train Loss: 0.6179, Train Steps/Sec: 1.21
|
| 571 |
+
[[34m2025-10-28 11:09:45[0m] (step=0048300) Train Loss: 0.6167, Train Steps/Sec: 1.21
|
| 572 |
+
[[34m2025-10-28 11:11:07[0m] (step=0048400) Train Loss: 0.6172, Train Steps/Sec: 1.21
|
| 573 |
+
[[34m2025-10-28 11:12:30[0m] (step=0048500) Train Loss: 0.6176, Train Steps/Sec: 1.21
|
| 574 |
+
[[34m2025-10-28 11:13:52[0m] (step=0048600) Train Loss: 0.6162, Train Steps/Sec: 1.21
|
| 575 |
+
[[34m2025-10-28 11:15:15[0m] (step=0048700) Train Loss: 0.6158, Train Steps/Sec: 1.21
|
| 576 |
+
[[34m2025-10-28 11:16:29[0m] Beginning epoch 39...
|
| 577 |
+
[[34m2025-10-28 11:16:40[0m] (step=0048800) Train Loss: 0.6158, Train Steps/Sec: 1.18
|
| 578 |
+
[[34m2025-10-28 11:18:02[0m] (step=0048900) Train Loss: 0.6174, Train Steps/Sec: 1.21
|
| 579 |
+
[[34m2025-10-28 11:19:25[0m] (step=0049000) Train Loss: 0.6169, Train Steps/Sec: 1.20
|
| 580 |
+
[[34m2025-10-28 11:20:48[0m] (step=0049100) Train Loss: 0.6174, Train Steps/Sec: 1.21
|
| 581 |
+
[[34m2025-10-28 11:22:10[0m] (step=0049200) Train Loss: 0.6161, Train Steps/Sec: 1.21
|
| 582 |
+
[[34m2025-10-28 11:23:33[0m] (step=0049300) Train Loss: 0.6156, Train Steps/Sec: 1.21
|
| 583 |
+
[[34m2025-10-28 11:24:55[0m] (step=0049400) Train Loss: 0.6178, Train Steps/Sec: 1.21
|
| 584 |
+
[[34m2025-10-28 11:26:18[0m] (step=0049500) Train Loss: 0.6164, Train Steps/Sec: 1.21
|
| 585 |
+
[[34m2025-10-28 11:27:40[0m] (step=0049600) Train Loss: 0.6165, Train Steps/Sec: 1.21
|
| 586 |
+
[[34m2025-10-28 11:29:03[0m] (step=0049700) Train Loss: 0.6176, Train Steps/Sec: 1.21
|
| 587 |
+
[[34m2025-10-28 11:30:25[0m] (step=0049800) Train Loss: 0.6164, Train Steps/Sec: 1.21
|
| 588 |
+
[[34m2025-10-28 11:31:47[0m] (step=0049900) Train Loss: 0.6153, Train Steps/Sec: 1.21
|
| 589 |
+
[[34m2025-10-28 11:33:10[0m] (step=0050000) Train Loss: 0.6175, Train Steps/Sec: 1.21
|
| 590 |
+
[[34m2025-10-28 11:34:00[0m] Saved checkpoint to results/stage2/hfdata/lightningdit-xl-pe-vit-b-bf16/checkpoints/0050000.pt
|
| 591 |
+
[[34m2025-10-28 11:34:00[0m] Generating EMA samples...
|
| 592 |
+
[[34m2025-10-28 11:34:28[0m] Generating EMA samples done.
|
| 593 |
+
[[34m2025-10-28 11:35:01[0m] Beginning epoch 40...
|
| 594 |
+
[[34m2025-10-28 11:35:53[0m] (step=0050100) Train Loss: 0.6142, Train Steps/Sec: 0.61
|
| 595 |
+
[[34m2025-10-28 11:37:15[0m] (step=0050200) Train Loss: 0.6155, Train Steps/Sec: 1.21
|
| 596 |
+
[[34m2025-10-28 11:38:38[0m] (step=0050300) Train Loss: 0.6145, Train Steps/Sec: 1.21
|
| 597 |
+
[[34m2025-10-28 11:40:00[0m] (step=0050400) Train Loss: 0.6168, Train Steps/Sec: 1.21
|
| 598 |
+
[[34m2025-10-28 11:41:22[0m] (step=0050500) Train Loss: 0.6172, Train Steps/Sec: 1.21
|
| 599 |
+
[[34m2025-10-28 11:42:45[0m] (step=0050600) Train Loss: 0.6145, Train Steps/Sec: 1.21
|
| 600 |
+
[[34m2025-10-28 11:44:08[0m] (step=0050700) Train Loss: 0.6163, Train Steps/Sec: 1.20
|
| 601 |
+
[[34m2025-10-28 11:45:31[0m] (step=0050800) Train Loss: 0.6164, Train Steps/Sec: 1.22
|
| 602 |
+
[[34m2025-10-28 11:46:53[0m] (step=0050900) Train Loss: 0.6158, Train Steps/Sec: 1.21
|
| 603 |
+
[[34m2025-10-28 11:48:15[0m] (step=0051000) Train Loss: 0.6165, Train Steps/Sec: 1.21
|
| 604 |
+
[[34m2025-10-28 11:49:38[0m] (step=0051100) Train Loss: 0.6161, Train Steps/Sec: 1.21
|
| 605 |
+
[[34m2025-10-28 11:51:00[0m] (step=0051200) Train Loss: 0.6159, Train Steps/Sec: 1.22
|
| 606 |
+
[[34m2025-10-28 11:52:15[0m] Beginning epoch 41...
|
| 607 |
+
[[34m2025-10-28 11:52:25[0m] (step=0051300) Train Loss: 0.6159, Train Steps/Sec: 1.18
|
| 608 |
+
[[34m2025-10-28 11:53:47[0m] (step=0051400) Train Loss: 0.6165, Train Steps/Sec: 1.21
|
| 609 |
+
[[34m2025-10-28 11:55:10[0m] (step=0051500) Train Loss: 0.6150, Train Steps/Sec: 1.21
|
| 610 |
+
[[34m2025-10-28 11:56:32[0m] (step=0051600) Train Loss: 0.6150, Train Steps/Sec: 1.21
|
| 611 |
+
[[34m2025-10-28 11:57:54[0m] (step=0051700) Train Loss: 0.6151, Train Steps/Sec: 1.21
|
| 612 |
+
[[34m2025-10-28 11:59:17[0m] (step=0051800) Train Loss: 0.6146, Train Steps/Sec: 1.21
|
| 613 |
+
[[34m2025-10-28 12:00:39[0m] (step=0051900) Train Loss: 0.6158, Train Steps/Sec: 1.21
|
| 614 |
+
[[34m2025-10-28 12:02:02[0m] (step=0052000) Train Loss: 0.6156, Train Steps/Sec: 1.21
|
| 615 |
+
[[34m2025-10-28 12:03:24[0m] (step=0052100) Train Loss: 0.6145, Train Steps/Sec: 1.21
|
| 616 |
+
[[34m2025-10-28 12:04:46[0m] (step=0052200) Train Loss: 0.6156, Train Steps/Sec: 1.21
|
| 617 |
+
[[34m2025-10-28 12:06:09[0m] (step=0052300) Train Loss: 0.6164, Train Steps/Sec: 1.20
|
| 618 |
+
[[34m2025-10-28 12:07:32[0m] (step=0052400) Train Loss: 0.6163, Train Steps/Sec: 1.21
|
| 619 |
+
[[34m2025-10-28 12:08:55[0m] (step=0052500) Train Loss: 0.6150, Train Steps/Sec: 1.21
|
| 620 |
+
[[34m2025-10-28 12:09:30[0m] Beginning epoch 42...
|
| 621 |
+
[[34m2025-10-28 12:10:20[0m] (step=0052600) Train Loss: 0.6150, Train Steps/Sec: 1.18
|
| 622 |
+
[[34m2025-10-28 12:11:42[0m] (step=0052700) Train Loss: 0.6144, Train Steps/Sec: 1.21
|
| 623 |
+
[[34m2025-10-28 12:13:04[0m] (step=0052800) Train Loss: 0.6147, Train Steps/Sec: 1.21
|
| 624 |
+
[[34m2025-10-28 12:14:27[0m] (step=0052900) Train Loss: 0.6158, Train Steps/Sec: 1.21
|
| 625 |
+
[[34m2025-10-28 12:15:49[0m] (step=0053000) Train Loss: 0.6162, Train Steps/Sec: 1.21
|
| 626 |
+
[[34m2025-10-28 12:17:11[0m] (step=0053100) Train Loss: 0.6145, Train Steps/Sec: 1.21
|
| 627 |
+
[[34m2025-10-28 12:18:34[0m] (step=0053200) Train Loss: 0.6146, Train Steps/Sec: 1.21
|
| 628 |
+
[[34m2025-10-28 12:19:56[0m] (step=0053300) Train Loss: 0.6143, Train Steps/Sec: 1.21
|
| 629 |
+
[[34m2025-10-28 12:21:19[0m] (step=0053400) Train Loss: 0.6135, Train Steps/Sec: 1.21
|
| 630 |
+
[[34m2025-10-28 12:22:41[0m] (step=0053500) Train Loss: 0.6158, Train Steps/Sec: 1.21
|
| 631 |
+
[[34m2025-10-28 12:24:03[0m] (step=0053600) Train Loss: 0.6169, Train Steps/Sec: 1.21
|
| 632 |
+
[[34m2025-10-28 12:25:26[0m] (step=0053700) Train Loss: 0.6154, Train Steps/Sec: 1.21
|
| 633 |
+
[[34m2025-10-28 12:26:43[0m] Beginning epoch 43...
|
| 634 |
+
[[34m2025-10-28 12:26:51[0m] (step=0053800) Train Loss: 0.6158, Train Steps/Sec: 1.18
|
| 635 |
+
[[34m2025-10-28 12:28:13[0m] (step=0053900) Train Loss: 0.6136, Train Steps/Sec: 1.21
|
| 636 |
+
[[34m2025-10-28 12:29:36[0m] (step=0054000) Train Loss: 0.6153, Train Steps/Sec: 1.20
|
| 637 |
+
[[34m2025-10-28 12:30:59[0m] (step=0054100) Train Loss: 0.6149, Train Steps/Sec: 1.21
|
| 638 |
+
[[34m2025-10-28 12:32:21[0m] (step=0054200) Train Loss: 0.6145, Train Steps/Sec: 1.21
|
| 639 |
+
[[34m2025-10-28 12:33:44[0m] (step=0054300) Train Loss: 0.6164, Train Steps/Sec: 1.22
|
| 640 |
+
[[34m2025-10-28 12:35:06[0m] (step=0054400) Train Loss: 0.6165, Train Steps/Sec: 1.22
|
| 641 |
+
[[34m2025-10-28 12:36:28[0m] (step=0054500) Train Loss: 0.6120, Train Steps/Sec: 1.22
|
| 642 |
+
[[34m2025-10-28 12:37:50[0m] (step=0054600) Train Loss: 0.6159, Train Steps/Sec: 1.21
|
| 643 |
+
[[34m2025-10-28 12:39:13[0m] (step=0054700) Train Loss: 0.6144, Train Steps/Sec: 1.22
|
| 644 |
+
[[34m2025-10-28 12:40:35[0m] (step=0054800) Train Loss: 0.6151, Train Steps/Sec: 1.22
|
| 645 |
+
[[34m2025-10-28 12:41:57[0m] (step=0054900) Train Loss: 0.6150, Train Steps/Sec: 1.21
|
| 646 |
+
[[34m2025-10-28 12:43:20[0m] (step=0055000) Train Loss: 0.6145, Train Steps/Sec: 1.21
|
| 647 |
+
[[34m2025-10-28 12:43:56[0m] Beginning epoch 44...
|
| 648 |
+
[[34m2025-10-28 12:44:45[0m] (step=0055100) Train Loss: 0.6144, Train Steps/Sec: 1.18
|
| 649 |
+
[[34m2025-10-28 12:46:07[0m] (step=0055200) Train Loss: 0.6134, Train Steps/Sec: 1.21
|
| 650 |
+
[[34m2025-10-28 12:47:29[0m] (step=0055300) Train Loss: 0.6136, Train Steps/Sec: 1.21
|
| 651 |
+
[[34m2025-10-28 12:48:52[0m] (step=0055400) Train Loss: 0.6150, Train Steps/Sec: 1.21
|
| 652 |
+
[[34m2025-10-28 12:50:14[0m] (step=0055500) Train Loss: 0.6146, Train Steps/Sec: 1.22
|
| 653 |
+
[[34m2025-10-28 12:51:37[0m] (step=0055600) Train Loss: 0.6129, Train Steps/Sec: 1.21
|
| 654 |
+
[[34m2025-10-28 12:53:00[0m] (step=0055700) Train Loss: 0.6138, Train Steps/Sec: 1.21
|
| 655 |
+
[[34m2025-10-28 12:54:22[0m] (step=0055800) Train Loss: 0.6136, Train Steps/Sec: 1.21
|
| 656 |
+
[[34m2025-10-28 12:55:45[0m] (step=0055900) Train Loss: 0.6139, Train Steps/Sec: 1.21
|
| 657 |
+
[[34m2025-10-28 12:57:07[0m] (step=0056000) Train Loss: 0.6112, Train Steps/Sec: 1.21
|
| 658 |
+
[[34m2025-10-28 12:58:29[0m] (step=0056100) Train Loss: 0.6153, Train Steps/Sec: 1.21
|
| 659 |
+
[[34m2025-10-28 12:59:52[0m] (step=0056200) Train Loss: 0.6140, Train Steps/Sec: 1.22
|
| 660 |
+
[[34m2025-10-28 13:01:10[0m] Beginning epoch 45...
|
| 661 |
+
[[34m2025-10-28 13:01:17[0m] (step=0056300) Train Loss: 0.6139, Train Steps/Sec: 1.18
|
| 662 |
+
[[34m2025-10-28 13:02:39[0m] (step=0056400) Train Loss: 0.6130, Train Steps/Sec: 1.21
|
| 663 |
+
[[34m2025-10-28 13:04:01[0m] (step=0056500) Train Loss: 0.6143, Train Steps/Sec: 1.21
|
| 664 |
+
[[34m2025-10-28 13:05:24[0m] (step=0056600) Train Loss: 0.6151, Train Steps/Sec: 1.21
|
| 665 |
+
[[34m2025-10-28 13:06:46[0m] (step=0056700) Train Loss: 0.6136, Train Steps/Sec: 1.21
|
| 666 |
+
[[34m2025-10-28 13:08:09[0m] (step=0056800) Train Loss: 0.6132, Train Steps/Sec: 1.21
|
| 667 |
+
[[34m2025-10-28 13:09:31[0m] (step=0056900) Train Loss: 0.6118, Train Steps/Sec: 1.21
|
| 668 |
+
[[34m2025-10-28 13:10:54[0m] (step=0057000) Train Loss: 0.6147, Train Steps/Sec: 1.21
|
| 669 |
+
[[34m2025-10-28 13:12:16[0m] (step=0057100) Train Loss: 0.6131, Train Steps/Sec: 1.21
|
| 670 |
+
[[34m2025-10-28 13:13:39[0m] (step=0057200) Train Loss: 0.6141, Train Steps/Sec: 1.21
|
| 671 |
+
[[34m2025-10-28 13:15:02[0m] (step=0057300) Train Loss: 0.6149, Train Steps/Sec: 1.21
|
| 672 |
+
[[34m2025-10-28 13:16:24[0m] (step=0057400) Train Loss: 0.6145, Train Steps/Sec: 1.21
|
| 673 |
+
[[34m2025-10-28 13:17:47[0m] (step=0057500) Train Loss: 0.6139, Train Steps/Sec: 1.21
|
| 674 |
+
[[34m2025-10-28 13:18:25[0m] Beginning epoch 46...
|
| 675 |
+
[[34m2025-10-28 13:19:11[0m] (step=0057600) Train Loss: 0.6126, Train Steps/Sec: 1.18
|
| 676 |
+
[[34m2025-10-28 13:20:34[0m] (step=0057700) Train Loss: 0.6140, Train Steps/Sec: 1.21
|
| 677 |
+
[[34m2025-10-28 13:21:56[0m] (step=0057800) Train Loss: 0.6127, Train Steps/Sec: 1.21
|
| 678 |
+
[[34m2025-10-28 13:23:19[0m] (step=0057900) Train Loss: 0.6132, Train Steps/Sec: 1.21
|
| 679 |
+
[[34m2025-10-28 13:24:41[0m] (step=0058000) Train Loss: 0.6151, Train Steps/Sec: 1.21
|
| 680 |
+
[[34m2025-10-28 13:26:04[0m] (step=0058100) Train Loss: 0.6142, Train Steps/Sec: 1.21
|
| 681 |
+
[[34m2025-10-28 13:27:26[0m] (step=0058200) Train Loss: 0.6136, Train Steps/Sec: 1.21
|
| 682 |
+
[[34m2025-10-28 13:28:48[0m] (step=0058300) Train Loss: 0.6143, Train Steps/Sec: 1.21
|
| 683 |
+
[[34m2025-10-28 13:30:11[0m] (step=0058400) Train Loss: 0.6141, Train Steps/Sec: 1.21
|
| 684 |
+
[[34m2025-10-28 13:31:33[0m] (step=0058500) Train Loss: 0.6136, Train Steps/Sec: 1.22
|
| 685 |
+
[[34m2025-10-28 13:32:55[0m] (step=0058600) Train Loss: 0.6136, Train Steps/Sec: 1.21
|
| 686 |
+
[[34m2025-10-28 13:34:18[0m] (step=0058700) Train Loss: 0.6140, Train Steps/Sec: 1.21
|
| 687 |
+
[[34m2025-10-28 13:35:38[0m] Beginning epoch 47...
|
| 688 |
+
[[34m2025-10-28 13:35:43[0m] (step=0058800) Train Loss: 0.6139, Train Steps/Sec: 1.18
|
| 689 |
+
[[34m2025-10-28 13:37:05[0m] (step=0058900) Train Loss: 0.6128, Train Steps/Sec: 1.21
|
| 690 |
+
[[34m2025-10-28 13:38:28[0m] (step=0059000) Train Loss: 0.6140, Train Steps/Sec: 1.21
|
| 691 |
+
[[34m2025-10-28 13:39:51[0m] (step=0059100) Train Loss: 0.6125, Train Steps/Sec: 1.21
|
| 692 |
+
[[34m2025-10-28 13:41:13[0m] (step=0059200) Train Loss: 0.6143, Train Steps/Sec: 1.21
|
| 693 |
+
[[34m2025-10-28 13:42:35[0m] (step=0059300) Train Loss: 0.6113, Train Steps/Sec: 1.21
|
| 694 |
+
[[34m2025-10-28 13:43:58[0m] (step=0059400) Train Loss: 0.6131, Train Steps/Sec: 1.21
|
| 695 |
+
[[34m2025-10-28 13:45:20[0m] (step=0059500) Train Loss: 0.6138, Train Steps/Sec: 1.21
|
| 696 |
+
[[34m2025-10-28 13:46:43[0m] (step=0059600) Train Loss: 0.6129, Train Steps/Sec: 1.21
|
| 697 |
+
[[34m2025-10-28 13:48:05[0m] (step=0059700) Train Loss: 0.6129, Train Steps/Sec: 1.21
|
| 698 |
+
[[34m2025-10-28 13:49:28[0m] (step=0059800) Train Loss: 0.6120, Train Steps/Sec: 1.21
|
| 699 |
+
[[34m2025-10-28 13:50:50[0m] (step=0059900) Train Loss: 0.6121, Train Steps/Sec: 1.21
|
| 700 |
+
[[34m2025-10-28 13:52:12[0m] (step=0060000) Train Loss: 0.6131, Train Steps/Sec: 1.21
|
| 701 |
+
[[34m2025-10-28 13:52:53[0m] Beginning epoch 48...
|
| 702 |
+
[[34m2025-10-28 13:53:37[0m] (step=0060100) Train Loss: 0.6122, Train Steps/Sec: 1.18
|
| 703 |
+
[[34m2025-10-28 13:55:00[0m] (step=0060200) Train Loss: 0.6128, Train Steps/Sec: 1.21
|
| 704 |
+
[[34m2025-10-28 13:56:22[0m] (step=0060300) Train Loss: 0.6120, Train Steps/Sec: 1.21
|
| 705 |
+
[[34m2025-10-28 13:57:45[0m] (step=0060400) Train Loss: 0.6112, Train Steps/Sec: 1.21
|
| 706 |
+
[[34m2025-10-28 13:59:07[0m] (step=0060500) Train Loss: 0.6133, Train Steps/Sec: 1.21
|
| 707 |
+
[[34m2025-10-28 14:00:30[0m] (step=0060600) Train Loss: 0.6120, Train Steps/Sec: 1.20
|
| 708 |
+
[[34m2025-10-28 14:01:53[0m] (step=0060700) Train Loss: 0.6118, Train Steps/Sec: 1.21
|
| 709 |
+
[[34m2025-10-28 14:03:15[0m] (step=0060800) Train Loss: 0.6121, Train Steps/Sec: 1.21
|
| 710 |
+
[[34m2025-10-28 14:04:38[0m] (step=0060900) Train Loss: 0.6113, Train Steps/Sec: 1.21
|
| 711 |
+
[[34m2025-10-28 14:06:00[0m] (step=0061000) Train Loss: 0.6121, Train Steps/Sec: 1.21
|
| 712 |
+
[[34m2025-10-28 14:07:23[0m] (step=0061100) Train Loss: 0.6118, Train Steps/Sec: 1.21
|
| 713 |
+
[[34m2025-10-28 14:08:45[0m] (step=0061200) Train Loss: 0.6120, Train Steps/Sec: 1.21
|
| 714 |
+
[[34m2025-10-28 14:10:07[0m] Beginning epoch 49...
|
| 715 |
+
[[34m2025-10-28 14:10:10[0m] (step=0061300) Train Loss: 0.6123, Train Steps/Sec: 1.18
|
| 716 |
+
[[34m2025-10-28 14:11:32[0m] (step=0061400) Train Loss: 0.6107, Train Steps/Sec: 1.21
|
| 717 |
+
[[34m2025-10-28 14:12:54[0m] (step=0061500) Train Loss: 0.6125, Train Steps/Sec: 1.21
|
| 718 |
+
[[34m2025-10-28 14:14:17[0m] (step=0061600) Train Loss: 0.6126, Train Steps/Sec: 1.21
|
| 719 |
+
[[34m2025-10-28 14:15:39[0m] (step=0061700) Train Loss: 0.6120, Train Steps/Sec: 1.21
|
| 720 |
+
[[34m2025-10-28 14:17:01[0m] (step=0061800) Train Loss: 0.6122, Train Steps/Sec: 1.21
|
| 721 |
+
[[34m2025-10-28 14:18:23[0m] (step=0061900) Train Loss: 0.6126, Train Steps/Sec: 1.22
|
| 722 |
+
[[34m2025-10-28 14:19:46[0m] (step=0062000) Train Loss: 0.6120, Train Steps/Sec: 1.21
|
| 723 |
+
[[34m2025-10-28 14:21:08[0m] (step=0062100) Train Loss: 0.6119, Train Steps/Sec: 1.21
|
| 724 |
+
[[34m2025-10-28 14:22:31[0m] (step=0062200) Train Loss: 0.6123, Train Steps/Sec: 1.21
|
| 725 |
+
[[34m2025-10-28 14:23:54[0m] (step=0062300) Train Loss: 0.6116, Train Steps/Sec: 1.21
|
| 726 |
+
[[34m2025-10-28 14:25:16[0m] (step=0062400) Train Loss: 0.6124, Train Steps/Sec: 1.21
|
| 727 |
+
[[34m2025-10-28 14:26:38[0m] (step=0062500) Train Loss: 0.6108, Train Steps/Sec: 1.22
|
| 728 |
+
[[34m2025-10-28 14:27:20[0m] Beginning epoch 50...
|
| 729 |
+
[[34m2025-10-28 14:28:03[0m] (step=0062600) Train Loss: 0.6111, Train Steps/Sec: 1.18
|
| 730 |
+
[[34m2025-10-28 14:29:26[0m] (step=0062700) Train Loss: 0.6107, Train Steps/Sec: 1.21
|
| 731 |
+
[[34m2025-10-28 14:30:48[0m] (step=0062800) Train Loss: 0.6113, Train Steps/Sec: 1.21
|
| 732 |
+
[[34m2025-10-28 14:32:11[0m] (step=0062900) Train Loss: 0.6115, Train Steps/Sec: 1.21
|
| 733 |
+
[[34m2025-10-28 14:33:33[0m] (step=0063000) Train Loss: 0.6120, Train Steps/Sec: 1.21
|
| 734 |
+
[[34m2025-10-28 14:34:55[0m] (step=0063100) Train Loss: 0.6098, Train Steps/Sec: 1.21
|
| 735 |
+
[[34m2025-10-28 14:36:18[0m] (step=0063200) Train Loss: 0.6102, Train Steps/Sec: 1.21
|
| 736 |
+
[[34m2025-10-28 14:37:40[0m] (step=0063300) Train Loss: 0.6123, Train Steps/Sec: 1.21
|
| 737 |
+
[[34m2025-10-28 14:39:03[0m] (step=0063400) Train Loss: 0.6102, Train Steps/Sec: 1.21
|
| 738 |
+
[[34m2025-10-28 14:40:25[0m] (step=0063500) Train Loss: 0.6101, Train Steps/Sec: 1.21
|
| 739 |
+
[[34m2025-10-28 14:41:48[0m] (step=0063600) Train Loss: 0.6126, Train Steps/Sec: 1.21
|
| 740 |
+
[[34m2025-10-28 14:43:10[0m] (step=0063700) Train Loss: 0.6117, Train Steps/Sec: 1.21
|
| 741 |
+
[[34m2025-10-28 14:44:33[0m] (step=0063800) Train Loss: 0.6122, Train Steps/Sec: 1.21
|
| 742 |
+
[[34m2025-10-28 14:44:34[0m] Beginning epoch 51...
|
| 743 |
+
[[34m2025-10-28 14:45:58[0m] (step=0063900) Train Loss: 0.6100, Train Steps/Sec: 1.17
|
| 744 |
+
[[34m2025-10-28 14:47:21[0m] (step=0064000) Train Loss: 0.6112, Train Steps/Sec: 1.20
|
| 745 |
+
[[34m2025-10-28 14:48:44[0m] (step=0064100) Train Loss: 0.6109, Train Steps/Sec: 1.21
|
| 746 |
+
[[34m2025-10-28 14:50:06[0m] (step=0064200) Train Loss: 0.6116, Train Steps/Sec: 1.21
|
| 747 |
+
[[34m2025-10-28 14:51:28[0m] (step=0064300) Train Loss: 0.6112, Train Steps/Sec: 1.21
|
| 748 |
+
[[34m2025-10-28 14:52:51[0m] (step=0064400) Train Loss: 0.6116, Train Steps/Sec: 1.21
|
| 749 |
+
[[34m2025-10-28 14:54:13[0m] (step=0064500) Train Loss: 0.6116, Train Steps/Sec: 1.22
|
| 750 |
+
[[34m2025-10-28 14:55:35[0m] (step=0064600) Train Loss: 0.6117, Train Steps/Sec: 1.22
|
| 751 |
+
[[34m2025-10-28 14:56:58[0m] (step=0064700) Train Loss: 0.6103, Train Steps/Sec: 1.21
|
| 752 |
+
[[34m2025-10-28 14:58:20[0m] (step=0064800) Train Loss: 0.6107, Train Steps/Sec: 1.21
|
| 753 |
+
[[34m2025-10-28 14:59:42[0m] (step=0064900) Train Loss: 0.6104, Train Steps/Sec: 1.21
|
| 754 |
+
[[34m2025-10-28 15:01:05[0m] (step=0065000) Train Loss: 0.6120, Train Steps/Sec: 1.22
|
| 755 |
+
[[34m2025-10-28 15:01:48[0m] Beginning epoch 52...
|
| 756 |
+
[[34m2025-10-28 15:02:30[0m] (step=0065100) Train Loss: 0.6100, Train Steps/Sec: 1.18
|
| 757 |
+
[[34m2025-10-28 15:03:52[0m] (step=0065200) Train Loss: 0.6115, Train Steps/Sec: 1.21
|
| 758 |
+
[[34m2025-10-28 15:05:14[0m] (step=0065300) Train Loss: 0.6115, Train Steps/Sec: 1.21
|
| 759 |
+
[[34m2025-10-28 15:06:37[0m] (step=0065400) Train Loss: 0.6111, Train Steps/Sec: 1.21
|
| 760 |
+
[[34m2025-10-28 15:07:59[0m] (step=0065500) Train Loss: 0.6100, Train Steps/Sec: 1.21
|
| 761 |
+
[[34m2025-10-28 15:09:22[0m] (step=0065600) Train Loss: 0.6106, Train Steps/Sec: 1.21
|
| 762 |
+
[[34m2025-10-28 15:10:45[0m] (step=0065700) Train Loss: 0.6097, Train Steps/Sec: 1.21
|
| 763 |
+
[[34m2025-10-28 15:12:07[0m] (step=0065800) Train Loss: 0.6094, Train Steps/Sec: 1.21
|
| 764 |
+
[[34m2025-10-28 15:13:30[0m] (step=0065900) Train Loss: 0.6110, Train Steps/Sec: 1.21
|
| 765 |
+
[[34m2025-10-28 15:14:52[0m] (step=0066000) Train Loss: 0.6098, Train Steps/Sec: 1.21
|
| 766 |
+
[[34m2025-10-28 15:16:14[0m] (step=0066100) Train Loss: 0.6111, Train Steps/Sec: 1.22
|
| 767 |
+
[[34m2025-10-28 15:17:37[0m] (step=0066200) Train Loss: 0.6103, Train Steps/Sec: 1.21
|
| 768 |
+
[[34m2025-10-28 15:18:59[0m] (step=0066300) Train Loss: 0.6095, Train Steps/Sec: 1.22
|
| 769 |
+
[[34m2025-10-28 15:19:02[0m] Beginning epoch 53...
|
| 770 |
+
[[34m2025-10-28 15:20:24[0m] (step=0066400) Train Loss: 0.6095, Train Steps/Sec: 1.18
|
| 771 |
+
[[34m2025-10-28 15:21:46[0m] (step=0066500) Train Loss: 0.6096, Train Steps/Sec: 1.21
|
| 772 |
+
[[34m2025-10-28 15:23:09[0m] (step=0066600) Train Loss: 0.6098, Train Steps/Sec: 1.22
|
| 773 |
+
[[34m2025-10-28 15:24:31[0m] (step=0066700) Train Loss: 0.6100, Train Steps/Sec: 1.22
|
| 774 |
+
[[34m2025-10-28 15:25:53[0m] (step=0066800) Train Loss: 0.6100, Train Steps/Sec: 1.22
|
| 775 |
+
[[34m2025-10-28 15:27:15[0m] (step=0066900) Train Loss: 0.6098, Train Steps/Sec: 1.22
|
| 776 |
+
[[34m2025-10-28 15:28:38[0m] (step=0067000) Train Loss: 0.6104, Train Steps/Sec: 1.21
|
| 777 |
+
[[34m2025-10-28 15:30:00[0m] (step=0067100) Train Loss: 0.6104, Train Steps/Sec: 1.21
|
| 778 |
+
[[34m2025-10-28 15:31:23[0m] (step=0067200) Train Loss: 0.6098, Train Steps/Sec: 1.21
|
| 779 |
+
[[34m2025-10-28 15:32:46[0m] (step=0067300) Train Loss: 0.6107, Train Steps/Sec: 1.21
|
| 780 |
+
[[34m2025-10-28 15:34:08[0m] (step=0067400) Train Loss: 0.6098, Train Steps/Sec: 1.21
|
| 781 |
+
[[34m2025-10-28 15:35:31[0m] (step=0067500) Train Loss: 0.6107, Train Steps/Sec: 1.21
|
| 782 |
+
[[34m2025-10-28 15:36:16[0m] Beginning epoch 54...
|
| 783 |
+
[[34m2025-10-28 15:36:56[0m] (step=0067600) Train Loss: 0.6102, Train Steps/Sec: 1.18
|
| 784 |
+
[[34m2025-10-28 15:38:18[0m] (step=0067700) Train Loss: 0.6082, Train Steps/Sec: 1.21
|
| 785 |
+
[[34m2025-10-28 15:39:40[0m] (step=0067800) Train Loss: 0.6096, Train Steps/Sec: 1.21
|
| 786 |
+
[[34m2025-10-28 15:41:03[0m] (step=0067900) Train Loss: 0.6108, Train Steps/Sec: 1.21
|
| 787 |
+
[[34m2025-10-28 15:42:25[0m] (step=0068000) Train Loss: 0.6101, Train Steps/Sec: 1.21
|
| 788 |
+
[[34m2025-10-28 15:43:48[0m] (step=0068100) Train Loss: 0.6109, Train Steps/Sec: 1.21
|
| 789 |
+
[[34m2025-10-28 15:45:10[0m] (step=0068200) Train Loss: 0.6103, Train Steps/Sec: 1.21
|
| 790 |
+
[[34m2025-10-28 15:46:32[0m] (step=0068300) Train Loss: 0.6095, Train Steps/Sec: 1.21
|
| 791 |
+
[[34m2025-10-28 15:47:55[0m] (step=0068400) Train Loss: 0.6100, Train Steps/Sec: 1.21
|
| 792 |
+
[[34m2025-10-28 15:49:17[0m] (step=0068500) Train Loss: 0.6095, Train Steps/Sec: 1.21
|
| 793 |
+
[[34m2025-10-28 15:50:40[0m] (step=0068600) Train Loss: 0.6106, Train Steps/Sec: 1.21
|
| 794 |
+
[[34m2025-10-28 15:52:02[0m] (step=0068700) Train Loss: 0.6093, Train Steps/Sec: 1.21
|
| 795 |
+
[[34m2025-10-28 15:53:25[0m] (step=0068800) Train Loss: 0.6099, Train Steps/Sec: 1.21
|
| 796 |
+
[[34m2025-10-28 15:53:29[0m] Beginning epoch 55...
|
| 797 |
+
[[34m2025-10-28 15:54:50[0m] (step=0068900) Train Loss: 0.6083, Train Steps/Sec: 1.17
|
| 798 |
+
[[34m2025-10-28 15:56:13[0m] (step=0069000) Train Loss: 0.6090, Train Steps/Sec: 1.21
|
| 799 |
+
[[34m2025-10-28 15:57:35[0m] (step=0069100) Train Loss: 0.6105, Train Steps/Sec: 1.22
|
| 800 |
+
[[34m2025-10-28 15:58:57[0m] (step=0069200) Train Loss: 0.6084, Train Steps/Sec: 1.21
|
| 801 |
+
[[34m2025-10-28 16:00:20[0m] (step=0069300) Train Loss: 0.6094, Train Steps/Sec: 1.21
|
| 802 |
+
[[34m2025-10-28 16:01:42[0m] (step=0069400) Train Loss: 0.6094, Train Steps/Sec: 1.21
|
| 803 |
+
[[34m2025-10-28 16:03:05[0m] (step=0069500) Train Loss: 0.6087, Train Steps/Sec: 1.21
|
| 804 |
+
[[34m2025-10-28 16:04:27[0m] (step=0069600) Train Loss: 0.6098, Train Steps/Sec: 1.21
|
| 805 |
+
[[34m2025-10-28 16:05:49[0m] (step=0069700) Train Loss: 0.6097, Train Steps/Sec: 1.21
|
| 806 |
+
[[34m2025-10-28 16:07:12[0m] (step=0069800) Train Loss: 0.6079, Train Steps/Sec: 1.21
|
| 807 |
+
[[34m2025-10-28 16:08:34[0m] (step=0069900) Train Loss: 0.6083, Train Steps/Sec: 1.21
|
| 808 |
+
[[34m2025-10-28 16:09:57[0m] (step=0070000) Train Loss: 0.6086, Train Steps/Sec: 1.21
|
| 809 |
+
[[34m2025-10-28 16:10:43[0m] Beginning epoch 56...
|
| 810 |
+
[[34m2025-10-28 16:11:21[0m] (step=0070100) Train Loss: 0.6092, Train Steps/Sec: 1.18
|
| 811 |
+
[[34m2025-10-28 16:12:44[0m] (step=0070200) Train Loss: 0.6095, Train Steps/Sec: 1.21
|
| 812 |
+
[[34m2025-10-28 16:14:06[0m] (step=0070300) Train Loss: 0.6083, Train Steps/Sec: 1.21
|
| 813 |
+
[[34m2025-10-28 16:15:29[0m] (step=0070400) Train Loss: 0.6085, Train Steps/Sec: 1.21
|
| 814 |
+
[[34m2025-10-28 16:16:51[0m] (step=0070500) Train Loss: 0.6085, Train Steps/Sec: 1.21
|
| 815 |
+
[[34m2025-10-28 16:18:14[0m] (step=0070600) Train Loss: 0.6084, Train Steps/Sec: 1.21
|
| 816 |
+
[[34m2025-10-28 16:19:37[0m] (step=0070700) Train Loss: 0.6095, Train Steps/Sec: 1.21
|
| 817 |
+
[[34m2025-10-28 16:20:59[0m] (step=0070800) Train Loss: 0.6085, Train Steps/Sec: 1.21
|
| 818 |
+
[[34m2025-10-28 16:22:22[0m] (step=0070900) Train Loss: 0.6107, Train Steps/Sec: 1.21
|
| 819 |
+
[[34m2025-10-28 16:23:44[0m] (step=0071000) Train Loss: 0.6099, Train Steps/Sec: 1.21
|
| 820 |
+
[[34m2025-10-28 16:25:06[0m] (step=0071100) Train Loss: 0.6091, Train Steps/Sec: 1.21
|
| 821 |
+
[[34m2025-10-28 16:26:29[0m] (step=0071200) Train Loss: 0.6084, Train Steps/Sec: 1.21
|
| 822 |
+
[[34m2025-10-28 16:27:51[0m] (step=0071300) Train Loss: 0.6090, Train Steps/Sec: 1.21
|
| 823 |
+
[[34m2025-10-28 16:27:58[0m] Beginning epoch 57...
|
| 824 |
+
[[34m2025-10-28 16:29:16[0m] (step=0071400) Train Loss: 0.6091, Train Steps/Sec: 1.18
|
| 825 |
+
[[34m2025-10-28 16:30:39[0m] (step=0071500) Train Loss: 0.6085, Train Steps/Sec: 1.21
|
| 826 |
+
[[34m2025-10-28 16:32:01[0m] (step=0071600) Train Loss: 0.6100, Train Steps/Sec: 1.21
|
| 827 |
+
[[34m2025-10-28 16:33:24[0m] (step=0071700) Train Loss: 0.6095, Train Steps/Sec: 1.21
|
| 828 |
+
[[34m2025-10-28 16:34:46[0m] (step=0071800) Train Loss: 0.6093, Train Steps/Sec: 1.21
|
| 829 |
+
[[34m2025-10-28 16:36:08[0m] (step=0071900) Train Loss: 0.6084, Train Steps/Sec: 1.21
|
| 830 |
+
[[34m2025-10-28 16:37:31[0m] (step=0072000) Train Loss: 0.6098, Train Steps/Sec: 1.21
|
| 831 |
+
[[34m2025-10-28 16:38:53[0m] (step=0072100) Train Loss: 0.6086, Train Steps/Sec: 1.21
|
| 832 |
+
[[34m2025-10-28 16:40:16[0m] (step=0072200) Train Loss: 0.6085, Train Steps/Sec: 1.21
|
| 833 |
+
[[34m2025-10-28 16:41:39[0m] (step=0072300) Train Loss: 0.6081, Train Steps/Sec: 1.21
|
| 834 |
+
[[34m2025-10-28 16:43:01[0m] (step=0072400) Train Loss: 0.6100, Train Steps/Sec: 1.21
|
| 835 |
+
[[34m2025-10-28 16:44:24[0m] (step=0072500) Train Loss: 0.6094, Train Steps/Sec: 1.21
|
| 836 |
+
[[34m2025-10-28 16:45:12[0m] Beginning epoch 58...
|
| 837 |
+
[[34m2025-10-28 16:45:49[0m] (step=0072600) Train Loss: 0.6090, Train Steps/Sec: 1.18
|
| 838 |
+
[[34m2025-10-28 16:47:11[0m] (step=0072700) Train Loss: 0.6089, Train Steps/Sec: 1.21
|
| 839 |
+
[[34m2025-10-28 16:48:33[0m] (step=0072800) Train Loss: 0.6082, Train Steps/Sec: 1.21
|
| 840 |
+
[[34m2025-10-28 16:49:56[0m] (step=0072900) Train Loss: 0.6080, Train Steps/Sec: 1.21
|
| 841 |
+
[[34m2025-10-28 16:51:18[0m] (step=0073000) Train Loss: 0.6088, Train Steps/Sec: 1.21
|
| 842 |
+
[[34m2025-10-28 16:52:41[0m] (step=0073100) Train Loss: 0.6083, Train Steps/Sec: 1.21
|
| 843 |
+
[[34m2025-10-28 16:54:03[0m] (step=0073200) Train Loss: 0.6084, Train Steps/Sec: 1.21
|
| 844 |
+
[[34m2025-10-28 16:55:26[0m] (step=0073300) Train Loss: 0.6061, Train Steps/Sec: 1.21
|
| 845 |
+
[[34m2025-10-28 16:56:48[0m] (step=0073400) Train Loss: 0.6090, Train Steps/Sec: 1.21
|
| 846 |
+
[[34m2025-10-28 16:58:10[0m] (step=0073500) Train Loss: 0.6081, Train Steps/Sec: 1.21
|
| 847 |
+
[[34m2025-10-28 16:59:33[0m] (step=0073600) Train Loss: 0.6096, Train Steps/Sec: 1.21
|
| 848 |
+
[[34m2025-10-28 17:00:55[0m] (step=0073700) Train Loss: 0.6094, Train Steps/Sec: 1.21
|
| 849 |
+
[[34m2025-10-28 17:02:18[0m] (step=0073800) Train Loss: 0.6077, Train Steps/Sec: 1.21
|
| 850 |
+
[[34m2025-10-28 17:02:26[0m] Beginning epoch 59...
|
| 851 |
+
[[34m2025-10-28 17:03:43[0m] (step=0073900) Train Loss: 0.6075, Train Steps/Sec: 1.17
|
| 852 |
+
[[34m2025-10-28 17:05:06[0m] (step=0074000) Train Loss: 0.6078, Train Steps/Sec: 1.21
|
| 853 |
+
[[34m2025-10-28 17:06:28[0m] (step=0074100) Train Loss: 0.6098, Train Steps/Sec: 1.21
|
| 854 |
+
[[34m2025-10-28 17:07:51[0m] (step=0074200) Train Loss: 0.6081, Train Steps/Sec: 1.21
|
| 855 |
+
[[34m2025-10-28 17:09:13[0m] (step=0074300) Train Loss: 0.6091, Train Steps/Sec: 1.21
|
| 856 |
+
[[34m2025-10-28 17:10:35[0m] (step=0074400) Train Loss: 0.6068, Train Steps/Sec: 1.21
|
| 857 |
+
[[34m2025-10-28 17:11:58[0m] (step=0074500) Train Loss: 0.6083, Train Steps/Sec: 1.21
|
| 858 |
+
[[34m2025-10-28 17:13:20[0m] (step=0074600) Train Loss: 0.6077, Train Steps/Sec: 1.21
|
| 859 |
+
[[34m2025-10-28 17:14:43[0m] (step=0074700) Train Loss: 0.6083, Train Steps/Sec: 1.21
|
| 860 |
+
[[34m2025-10-28 17:16:05[0m] (step=0074800) Train Loss: 0.6091, Train Steps/Sec: 1.21
|
| 861 |
+
[[34m2025-10-28 17:17:28[0m] (step=0074900) Train Loss: 0.6068, Train Steps/Sec: 1.21
|
| 862 |
+
[[34m2025-10-28 17:18:50[0m] (step=0075000) Train Loss: 0.6084, Train Steps/Sec: 1.21
|
| 863 |
+
[[34m2025-10-28 17:19:46[0m] Saved checkpoint to results/stage2/hfdata/lightningdit-xl-pe-vit-b-bf16/checkpoints/0075000.pt
|
| 864 |
+
[[34m2025-10-28 17:19:46[0m] Generating EMA samples...
|
| 865 |
+
[[34m2025-10-28 17:20:15[0m] Generating EMA samples done.
|
| 866 |
+
[[34m2025-10-28 17:21:04[0m] Beginning epoch 60...
|
| 867 |
+
[[34m2025-10-28 17:21:39[0m] (step=0075100) Train Loss: 0.6078, Train Steps/Sec: 0.59
|
| 868 |
+
[[34m2025-10-28 17:23:02[0m] (step=0075200) Train Loss: 0.6084, Train Steps/Sec: 1.21
|
| 869 |
+
[[34m2025-10-28 17:24:24[0m] (step=0075300) Train Loss: 0.6084, Train Steps/Sec: 1.21
|
| 870 |
+
[[34m2025-10-28 17:25:47[0m] (step=0075400) Train Loss: 0.6077, Train Steps/Sec: 1.21
|
| 871 |
+
[[34m2025-10-28 17:27:09[0m] (step=0075500) Train Loss: 0.6087, Train Steps/Sec: 1.21
|
| 872 |
+
[[34m2025-10-28 17:28:32[0m] (step=0075600) Train Loss: 0.6071, Train Steps/Sec: 1.21
|
| 873 |
+
[[34m2025-10-28 17:29:55[0m] (step=0075700) Train Loss: 0.6082, Train Steps/Sec: 1.21
|
| 874 |
+
[[34m2025-10-28 17:31:17[0m] (step=0075800) Train Loss: 0.6081, Train Steps/Sec: 1.22
|
| 875 |
+
[[34m2025-10-28 17:32:39[0m] (step=0075900) Train Loss: 0.6084, Train Steps/Sec: 1.22
|
| 876 |
+
[[34m2025-10-28 17:34:02[0m] (step=0076000) Train Loss: 0.6077, Train Steps/Sec: 1.21
|
| 877 |
+
[[34m2025-10-28 17:35:24[0m] (step=0076100) Train Loss: 0.6083, Train Steps/Sec: 1.22
|
| 878 |
+
[[34m2025-10-28 17:36:46[0m] (step=0076200) Train Loss: 0.6075, Train Steps/Sec: 1.22
|
| 879 |
+
[[34m2025-10-28 17:38:09[0m] (step=0076300) Train Loss: 0.6083, Train Steps/Sec: 1.21
|
| 880 |
+
[[34m2025-10-28 17:38:18[0m] Beginning epoch 61...
|
| 881 |
+
[[34m2025-10-28 17:39:33[0m] (step=0076400) Train Loss: 0.6076, Train Steps/Sec: 1.18
|
| 882 |
+
[[34m2025-10-28 17:40:56[0m] (step=0076500) Train Loss: 0.6080, Train Steps/Sec: 1.21
|
| 883 |
+
[[34m2025-10-28 17:42:18[0m] (step=0076600) Train Loss: 0.6083, Train Steps/Sec: 1.21
|
| 884 |
+
[[34m2025-10-28 17:43:41[0m] (step=0076700) Train Loss: 0.6059, Train Steps/Sec: 1.21
|
| 885 |
+
[[34m2025-10-28 17:45:03[0m] (step=0076800) Train Loss: 0.6076, Train Steps/Sec: 1.21
|
| 886 |
+
[[34m2025-10-28 17:46:25[0m] (step=0076900) Train Loss: 0.6068, Train Steps/Sec: 1.21
|
| 887 |
+
[[34m2025-10-28 17:47:48[0m] (step=0077000) Train Loss: 0.6079, Train Steps/Sec: 1.21
|
| 888 |
+
[[34m2025-10-28 17:49:10[0m] (step=0077100) Train Loss: 0.6074, Train Steps/Sec: 1.21
|
| 889 |
+
[[34m2025-10-28 17:50:33[0m] (step=0077200) Train Loss: 0.6066, Train Steps/Sec: 1.21
|
| 890 |
+
[[34m2025-10-28 17:51:56[0m] (step=0077300) Train Loss: 0.6080, Train Steps/Sec: 1.20
|
| 891 |
+
[[34m2025-10-28 17:53:18[0m] (step=0077400) Train Loss: 0.6069, Train Steps/Sec: 1.21
|
| 892 |
+
[[34m2025-10-28 17:54:41[0m] (step=0077500) Train Loss: 0.6072, Train Steps/Sec: 1.22
|
| 893 |
+
[[34m2025-10-28 17:55:32[0m] Beginning epoch 62...
|
| 894 |
+
[[34m2025-10-28 17:56:06[0m] (step=0077600) Train Loss: 0.6077, Train Steps/Sec: 1.18
|
| 895 |
+
[[34m2025-10-28 17:57:28[0m] (step=0077700) Train Loss: 0.6072, Train Steps/Sec: 1.21
|
| 896 |
+
[[34m2025-10-28 17:58:50[0m] (step=0077800) Train Loss: 0.6079, Train Steps/Sec: 1.21
|
| 897 |
+
[[34m2025-10-28 18:00:13[0m] (step=0077900) Train Loss: 0.6074, Train Steps/Sec: 1.21
|
| 898 |
+
[[34m2025-10-28 18:01:35[0m] (step=0078000) Train Loss: 0.6079, Train Steps/Sec: 1.21
|
| 899 |
+
[[34m2025-10-28 18:02:58[0m] (step=0078100) Train Loss: 0.6066, Train Steps/Sec: 1.21
|
| 900 |
+
[[34m2025-10-28 18:04:20[0m] (step=0078200) Train Loss: 0.6086, Train Steps/Sec: 1.21
|
| 901 |
+
[[34m2025-10-28 18:05:42[0m] (step=0078300) Train Loss: 0.6064, Train Steps/Sec: 1.21
|
| 902 |
+
[[34m2025-10-28 18:07:05[0m] (step=0078400) Train Loss: 0.6064, Train Steps/Sec: 1.21
|
| 903 |
+
[[34m2025-10-28 18:08:27[0m] (step=0078500) Train Loss: 0.6069, Train Steps/Sec: 1.21
|
| 904 |
+
[[34m2025-10-28 18:09:50[0m] (step=0078600) Train Loss: 0.6057, Train Steps/Sec: 1.21
|
| 905 |
+
[[34m2025-10-28 18:11:12[0m] (step=0078700) Train Loss: 0.6056, Train Steps/Sec: 1.21
|
| 906 |
+
[[34m2025-10-28 18:12:34[0m] (step=0078800) Train Loss: 0.6067, Train Steps/Sec: 1.21
|
| 907 |
+
[[34m2025-10-28 18:12:46[0m] Beginning epoch 63...
|
| 908 |
+
[[34m2025-10-28 18:14:00[0m] (step=0078900) Train Loss: 0.6065, Train Steps/Sec: 1.17
|
| 909 |
+
[[34m2025-10-28 18:15:22[0m] (step=0079000) Train Loss: 0.6057, Train Steps/Sec: 1.21
|
| 910 |
+
[[34m2025-10-28 18:16:45[0m] (step=0079100) Train Loss: 0.6067, Train Steps/Sec: 1.21
|
| 911 |
+
[[34m2025-10-28 18:18:07[0m] (step=0079200) Train Loss: 0.6064, Train Steps/Sec: 1.21
|
| 912 |
+
[[34m2025-10-28 18:19:30[0m] (step=0079300) Train Loss: 0.6063, Train Steps/Sec: 1.21
|
| 913 |
+
[[34m2025-10-28 18:20:52[0m] (step=0079400) Train Loss: 0.6074, Train Steps/Sec: 1.21
|
| 914 |
+
[[34m2025-10-28 18:22:14[0m] (step=0079500) Train Loss: 0.6067, Train Steps/Sec: 1.21
|
| 915 |
+
[[34m2025-10-28 18:23:36[0m] (step=0079600) Train Loss: 0.6069, Train Steps/Sec: 1.21
|
| 916 |
+
[[34m2025-10-28 18:24:59[0m] (step=0079700) Train Loss: 0.6072, Train Steps/Sec: 1.22
|
| 917 |
+
[[34m2025-10-28 18:26:21[0m] (step=0079800) Train Loss: 0.6064, Train Steps/Sec: 1.22
|
| 918 |
+
[[34m2025-10-28 18:27:43[0m] (step=0079900) Train Loss: 0.6068, Train Steps/Sec: 1.21
|
| 919 |
+
[[34m2025-10-28 18:29:06[0m] (step=0080000) Train Loss: 0.6059, Train Steps/Sec: 1.22
|
| 920 |
+
[[34m2025-10-28 18:29:59[0m] Beginning epoch 64...
|
| 921 |
+
[[34m2025-10-28 18:30:31[0m] (step=0080100) Train Loss: 0.6076, Train Steps/Sec: 1.17
|
| 922 |
+
[[34m2025-10-28 18:31:53[0m] (step=0080200) Train Loss: 0.6070, Train Steps/Sec: 1.21
|
| 923 |
+
[[34m2025-10-28 18:33:16[0m] (step=0080300) Train Loss: 0.6058, Train Steps/Sec: 1.21
|
| 924 |
+
[[34m2025-10-28 18:34:38[0m] (step=0080400) Train Loss: 0.6066, Train Steps/Sec: 1.21
|
| 925 |
+
[[34m2025-10-28 18:36:01[0m] (step=0080500) Train Loss: 0.6077, Train Steps/Sec: 1.21
|
| 926 |
+
[[34m2025-10-28 18:37:24[0m] (step=0080600) Train Loss: 0.6058, Train Steps/Sec: 1.20
|
| 927 |
+
[[34m2025-10-28 18:38:46[0m] (step=0080700) Train Loss: 0.6069, Train Steps/Sec: 1.21
|
| 928 |
+
[[34m2025-10-28 18:40:09[0m] (step=0080800) Train Loss: 0.6061, Train Steps/Sec: 1.21
|
| 929 |
+
[[34m2025-10-28 18:41:31[0m] (step=0080900) Train Loss: 0.6061, Train Steps/Sec: 1.21
|
| 930 |
+
[[34m2025-10-28 18:42:54[0m] (step=0081000) Train Loss: 0.6060, Train Steps/Sec: 1.21
|
| 931 |
+
[[34m2025-10-28 18:44:16[0m] (step=0081100) Train Loss: 0.6064, Train Steps/Sec: 1.21
|
| 932 |
+
[[34m2025-10-28 18:45:38[0m] (step=0081200) Train Loss: 0.6058, Train Steps/Sec: 1.21
|
| 933 |
+
[[34m2025-10-28 18:47:01[0m] (step=0081300) Train Loss: 0.6062, Train Steps/Sec: 1.21
|
| 934 |
+
[[34m2025-10-28 18:47:14[0m] Beginning epoch 65...
|
| 935 |
+
[[34m2025-10-28 18:48:26[0m] (step=0081400) Train Loss: 0.6064, Train Steps/Sec: 1.18
|
| 936 |
+
[[34m2025-10-28 18:49:48[0m] (step=0081500) Train Loss: 0.6055, Train Steps/Sec: 1.21
|
| 937 |
+
[[34m2025-10-28 18:51:11[0m] (step=0081600) Train Loss: 0.6057, Train Steps/Sec: 1.21
|
| 938 |
+
[[34m2025-10-28 18:52:33[0m] (step=0081700) Train Loss: 0.6056, Train Steps/Sec: 1.21
|
| 939 |
+
[[34m2025-10-28 18:53:55[0m] (step=0081800) Train Loss: 0.6063, Train Steps/Sec: 1.21
|
| 940 |
+
[[34m2025-10-28 18:55:18[0m] (step=0081900) Train Loss: 0.6063, Train Steps/Sec: 1.21
|
| 941 |
+
[[34m2025-10-28 18:56:40[0m] (step=0082000) Train Loss: 0.6060, Train Steps/Sec: 1.21
|
| 942 |
+
[[34m2025-10-28 18:58:02[0m] (step=0082100) Train Loss: 0.6064, Train Steps/Sec: 1.21
|
| 943 |
+
[[34m2025-10-28 18:59:25[0m] (step=0082200) Train Loss: 0.6061, Train Steps/Sec: 1.21
|
| 944 |
+
[[34m2025-10-28 19:00:48[0m] (step=0082300) Train Loss: 0.6056, Train Steps/Sec: 1.20
|
| 945 |
+
[[34m2025-10-28 19:02:10[0m] (step=0082400) Train Loss: 0.6068, Train Steps/Sec: 1.21
|
| 946 |
+
[[34m2025-10-28 19:03:33[0m] (step=0082500) Train Loss: 0.6050, Train Steps/Sec: 1.21
|
| 947 |
+
[[34m2025-10-28 19:04:28[0m] Beginning epoch 66...
|
| 948 |
+
[[34m2025-10-28 19:04:58[0m] (step=0082600) Train Loss: 0.6061, Train Steps/Sec: 1.18
|
| 949 |
+
[[34m2025-10-28 19:06:20[0m] (step=0082700) Train Loss: 0.6057, Train Steps/Sec: 1.21
|
| 950 |
+
[[34m2025-10-28 19:07:43[0m] (step=0082800) Train Loss: 0.6071, Train Steps/Sec: 1.21
|
| 951 |
+
[[34m2025-10-28 19:09:05[0m] (step=0082900) Train Loss: 0.6059, Train Steps/Sec: 1.21
|
| 952 |
+
[[34m2025-10-28 19:10:27[0m] (step=0083000) Train Loss: 0.6051, Train Steps/Sec: 1.21
|
| 953 |
+
[[34m2025-10-28 19:11:50[0m] (step=0083100) Train Loss: 0.6057, Train Steps/Sec: 1.21
|
| 954 |
+
[[34m2025-10-28 19:13:12[0m] (step=0083200) Train Loss: 0.6050, Train Steps/Sec: 1.21
|
| 955 |
+
[[34m2025-10-28 19:14:35[0m] (step=0083300) Train Loss: 0.6062, Train Steps/Sec: 1.21
|
| 956 |
+
[[34m2025-10-28 19:15:57[0m] (step=0083400) Train Loss: 0.6073, Train Steps/Sec: 1.21
|
| 957 |
+
[[34m2025-10-28 19:17:20[0m] (step=0083500) Train Loss: 0.6045, Train Steps/Sec: 1.21
|
| 958 |
+
[[34m2025-10-28 19:18:42[0m] (step=0083600) Train Loss: 0.6055, Train Steps/Sec: 1.21
|
| 959 |
+
[[34m2025-10-28 19:20:04[0m] (step=0083700) Train Loss: 0.6058, Train Steps/Sec: 1.21
|
| 960 |
+
[[34m2025-10-28 19:21:27[0m] (step=0083800) Train Loss: 0.6056, Train Steps/Sec: 1.21
|
| 961 |
+
[[34m2025-10-28 19:21:41[0m] Beginning epoch 67...
|
| 962 |
+
[[34m2025-10-28 19:22:52[0m] (step=0083900) Train Loss: 0.6058, Train Steps/Sec: 1.17
|
| 963 |
+
[[34m2025-10-28 19:24:15[0m] (step=0084000) Train Loss: 0.6065, Train Steps/Sec: 1.21
|
| 964 |
+
[[34m2025-10-28 19:25:37[0m] (step=0084100) Train Loss: 0.6035, Train Steps/Sec: 1.21
|
| 965 |
+
[[34m2025-10-28 19:27:00[0m] (step=0084200) Train Loss: 0.6055, Train Steps/Sec: 1.21
|
| 966 |
+
[[34m2025-10-28 19:28:22[0m] (step=0084300) Train Loss: 0.6057, Train Steps/Sec: 1.21
|
| 967 |
+
[[34m2025-10-28 19:29:45[0m] (step=0084400) Train Loss: 0.6046, Train Steps/Sec: 1.21
|
| 968 |
+
[[34m2025-10-28 19:31:07[0m] (step=0084500) Train Loss: 0.6051, Train Steps/Sec: 1.21
|
| 969 |
+
[[34m2025-10-28 19:32:29[0m] (step=0084600) Train Loss: 0.6055, Train Steps/Sec: 1.21
|
| 970 |
+
[[34m2025-10-28 19:33:52[0m] (step=0084700) Train Loss: 0.6046, Train Steps/Sec: 1.21
|
| 971 |
+
[[34m2025-10-28 19:35:15[0m] (step=0084800) Train Loss: 0.6044, Train Steps/Sec: 1.21
|
| 972 |
+
[[34m2025-10-28 19:36:37[0m] (step=0084900) Train Loss: 0.6061, Train Steps/Sec: 1.21
|
| 973 |
+
[[34m2025-10-28 19:37:59[0m] (step=0085000) Train Loss: 0.6055, Train Steps/Sec: 1.21
|
| 974 |
+
[[34m2025-10-28 19:38:56[0m] Beginning epoch 68...
|
| 975 |
+
[[34m2025-10-28 19:39:24[0m] (step=0085100) Train Loss: 0.6062, Train Steps/Sec: 1.18
|
| 976 |
+
[[34m2025-10-28 19:40:47[0m] (step=0085200) Train Loss: 0.6058, Train Steps/Sec: 1.21
|
| 977 |
+
[[34m2025-10-28 19:42:09[0m] (step=0085300) Train Loss: 0.6043, Train Steps/Sec: 1.21
|
| 978 |
+
[[34m2025-10-28 19:43:32[0m] (step=0085400) Train Loss: 0.6036, Train Steps/Sec: 1.21
|
| 979 |
+
[[34m2025-10-28 19:44:54[0m] (step=0085500) Train Loss: 0.6059, Train Steps/Sec: 1.21
|
| 980 |
+
[[34m2025-10-28 19:46:17[0m] (step=0085600) Train Loss: 0.6038, Train Steps/Sec: 1.20
|
| 981 |
+
[[34m2025-10-28 19:47:40[0m] (step=0085700) Train Loss: 0.6058, Train Steps/Sec: 1.21
|
| 982 |
+
[[34m2025-10-28 19:49:02[0m] (step=0085800) Train Loss: 0.6043, Train Steps/Sec: 1.21
|
| 983 |
+
[[34m2025-10-28 19:50:24[0m] (step=0085900) Train Loss: 0.6040, Train Steps/Sec: 1.21
|
| 984 |
+
[[34m2025-10-28 19:51:47[0m] (step=0086000) Train Loss: 0.6060, Train Steps/Sec: 1.21
|
| 985 |
+
[[34m2025-10-28 19:53:09[0m] (step=0086100) Train Loss: 0.6057, Train Steps/Sec: 1.21
|
| 986 |
+
[[34m2025-10-28 19:54:31[0m] (step=0086200) Train Loss: 0.6067, Train Steps/Sec: 1.21
|
| 987 |
+
[[34m2025-10-28 19:55:54[0m] (step=0086300) Train Loss: 0.6045, Train Steps/Sec: 1.21
|
| 988 |
+
[[34m2025-10-28 19:56:10[0m] Beginning epoch 69...
|
| 989 |
+
[[34m2025-10-28 19:57:19[0m] (step=0086400) Train Loss: 0.6041, Train Steps/Sec: 1.18
|
| 990 |
+
[[34m2025-10-28 19:58:41[0m] (step=0086500) Train Loss: 0.6041, Train Steps/Sec: 1.21
|
| 991 |
+
[[34m2025-10-28 20:00:03[0m] (step=0086600) Train Loss: 0.6042, Train Steps/Sec: 1.21
|
| 992 |
+
[[34m2025-10-28 20:01:26[0m] (step=0086700) Train Loss: 0.6051, Train Steps/Sec: 1.21
|
| 993 |
+
[[34m2025-10-28 20:02:48[0m] (step=0086800) Train Loss: 0.6049, Train Steps/Sec: 1.21
|
| 994 |
+
[[34m2025-10-28 20:04:11[0m] (step=0086900) Train Loss: 0.6049, Train Steps/Sec: 1.21
|
| 995 |
+
[[34m2025-10-28 20:05:33[0m] (step=0087000) Train Loss: 0.6051, Train Steps/Sec: 1.21
|
| 996 |
+
[[34m2025-10-28 20:06:55[0m] (step=0087100) Train Loss: 0.6048, Train Steps/Sec: 1.21
|
| 997 |
+
[[34m2025-10-28 20:08:18[0m] (step=0087200) Train Loss: 0.6048, Train Steps/Sec: 1.21
|
| 998 |
+
[[34m2025-10-28 20:09:41[0m] (step=0087300) Train Loss: 0.6050, Train Steps/Sec: 1.21
|
| 999 |
+
[[34m2025-10-28 20:11:03[0m] (step=0087400) Train Loss: 0.6041, Train Steps/Sec: 1.22
|
| 1000 |
+
[[34m2025-10-28 20:12:26[0m] (step=0087500) Train Loss: 0.6052, Train Steps/Sec: 1.22
|
| 1001 |
+
[[34m2025-10-28 20:13:24[0m] Beginning epoch 70...
|
| 1002 |
+
[[34m2025-10-28 20:13:51[0m] (step=0087600) Train Loss: 0.6043, Train Steps/Sec: 1.18
|
| 1003 |
+
[[34m2025-10-28 20:15:13[0m] (step=0087700) Train Loss: 0.6040, Train Steps/Sec: 1.21
|
| 1004 |
+
[[34m2025-10-28 20:16:35[0m] (step=0087800) Train Loss: 0.6047, Train Steps/Sec: 1.21
|
| 1005 |
+
[[34m2025-10-28 20:17:58[0m] (step=0087900) Train Loss: 0.6050, Train Steps/Sec: 1.21
|
| 1006 |
+
[[34m2025-10-28 20:19:20[0m] (step=0088000) Train Loss: 0.6056, Train Steps/Sec: 1.21
|
| 1007 |
+
[[34m2025-10-28 20:20:43[0m] (step=0088100) Train Loss: 0.6041, Train Steps/Sec: 1.21
|
| 1008 |
+
[[34m2025-10-28 20:22:05[0m] (step=0088200) Train Loss: 0.6028, Train Steps/Sec: 1.21
|
| 1009 |
+
[[34m2025-10-28 20:23:27[0m] (step=0088300) Train Loss: 0.6055, Train Steps/Sec: 1.21
|
| 1010 |
+
[[34m2025-10-28 20:24:50[0m] (step=0088400) Train Loss: 0.6056, Train Steps/Sec: 1.21
|
| 1011 |
+
[[34m2025-10-28 20:26:12[0m] (step=0088500) Train Loss: 0.6052, Train Steps/Sec: 1.21
|
| 1012 |
+
[[34m2025-10-28 20:27:35[0m] (step=0088600) Train Loss: 0.6060, Train Steps/Sec: 1.21
|
| 1013 |
+
[[34m2025-10-28 20:28:57[0m] (step=0088700) Train Loss: 0.6047, Train Steps/Sec: 1.21
|
| 1014 |
+
[[34m2025-10-28 20:30:20[0m] (step=0088800) Train Loss: 0.6041, Train Steps/Sec: 1.21
|
| 1015 |
+
[[34m2025-10-28 20:30:37[0m] Beginning epoch 71...
|
| 1016 |
+
[[34m2025-10-28 20:31:45[0m] (step=0088900) Train Loss: 0.6052, Train Steps/Sec: 1.17
|
| 1017 |
+
[[34m2025-10-28 20:33:08[0m] (step=0089000) Train Loss: 0.6051, Train Steps/Sec: 1.21
|
| 1018 |
+
[[34m2025-10-28 20:34:30[0m] (step=0089100) Train Loss: 0.6050, Train Steps/Sec: 1.21
|
| 1019 |
+
[[34m2025-10-28 20:35:52[0m] (step=0089200) Train Loss: 0.6039, Train Steps/Sec: 1.21
|
| 1020 |
+
[[34m2025-10-28 20:37:15[0m] (step=0089300) Train Loss: 0.6048, Train Steps/Sec: 1.21
|
| 1021 |
+
[[34m2025-10-28 20:38:37[0m] (step=0089400) Train Loss: 0.6039, Train Steps/Sec: 1.21
|
| 1022 |
+
[[34m2025-10-28 20:39:59[0m] (step=0089500) Train Loss: 0.6039, Train Steps/Sec: 1.21
|
| 1023 |
+
[[34m2025-10-28 20:41:22[0m] (step=0089600) Train Loss: 0.6028, Train Steps/Sec: 1.21
|
| 1024 |
+
[[34m2025-10-28 20:42:44[0m] (step=0089700) Train Loss: 0.6043, Train Steps/Sec: 1.21
|
| 1025 |
+
[[34m2025-10-28 20:44:07[0m] (step=0089800) Train Loss: 0.6061, Train Steps/Sec: 1.21
|
| 1026 |
+
[[34m2025-10-28 20:45:29[0m] (step=0089900) Train Loss: 0.6019, Train Steps/Sec: 1.21
|
| 1027 |
+
[[34m2025-10-28 20:46:51[0m] (step=0090000) Train Loss: 0.6035, Train Steps/Sec: 1.21
|
| 1028 |
+
[[34m2025-10-28 20:47:51[0m] Beginning epoch 72...
|
| 1029 |
+
[[34m2025-10-28 20:48:16[0m] (step=0090100) Train Loss: 0.6032, Train Steps/Sec: 1.18
|
| 1030 |
+
[[34m2025-10-28 20:49:39[0m] (step=0090200) Train Loss: 0.6047, Train Steps/Sec: 1.21
|
| 1031 |
+
[[34m2025-10-28 20:51:01[0m] (step=0090300) Train Loss: 0.6027, Train Steps/Sec: 1.21
|
| 1032 |
+
[[34m2025-10-28 20:52:24[0m] (step=0090400) Train Loss: 0.6056, Train Steps/Sec: 1.21
|
| 1033 |
+
[[34m2025-10-28 20:53:46[0m] (step=0090500) Train Loss: 0.6045, Train Steps/Sec: 1.21
|
| 1034 |
+
[[34m2025-10-28 20:55:09[0m] (step=0090600) Train Loss: 0.6045, Train Steps/Sec: 1.20
|
| 1035 |
+
[[34m2025-10-28 20:56:32[0m] (step=0090700) Train Loss: 0.6047, Train Steps/Sec: 1.21
|
| 1036 |
+
[[34m2025-10-28 20:57:54[0m] (step=0090800) Train Loss: 0.6037, Train Steps/Sec: 1.21
|
| 1037 |
+
[[34m2025-10-28 20:59:16[0m] (step=0090900) Train Loss: 0.6054, Train Steps/Sec: 1.21
|
| 1038 |
+
[[34m2025-10-28 21:00:38[0m] (step=0091000) Train Loss: 0.6043, Train Steps/Sec: 1.22
|
| 1039 |
+
[[34m2025-10-28 21:02:01[0m] (step=0091100) Train Loss: 0.6042, Train Steps/Sec: 1.22
|
| 1040 |
+
[[34m2025-10-28 21:03:23[0m] (step=0091200) Train Loss: 0.6042, Train Steps/Sec: 1.22
|
| 1041 |
+
[[34m2025-10-28 21:04:45[0m] (step=0091300) Train Loss: 0.6051, Train Steps/Sec: 1.22
|
| 1042 |
+
[[34m2025-10-28 21:05:05[0m] Beginning epoch 73...
|
| 1043 |
+
[[34m2025-10-28 21:06:10[0m] (step=0091400) Train Loss: 0.6041, Train Steps/Sec: 1.18
|
| 1044 |
+
[[34m2025-10-28 21:07:33[0m] (step=0091500) Train Loss: 0.6042, Train Steps/Sec: 1.21
|
| 1045 |
+
[[34m2025-10-28 21:08:55[0m] (step=0091600) Train Loss: 0.6034, Train Steps/Sec: 1.21
|
| 1046 |
+
[[34m2025-10-28 21:10:18[0m] (step=0091700) Train Loss: 0.6041, Train Steps/Sec: 1.21
|
| 1047 |
+
[[34m2025-10-28 21:11:40[0m] (step=0091800) Train Loss: 0.6046, Train Steps/Sec: 1.21
|
| 1048 |
+
[[34m2025-10-28 21:13:02[0m] (step=0091900) Train Loss: 0.6040, Train Steps/Sec: 1.21
|
| 1049 |
+
[[34m2025-10-28 21:14:25[0m] (step=0092000) Train Loss: 0.6046, Train Steps/Sec: 1.21
|
| 1050 |
+
[[34m2025-10-28 21:15:47[0m] (step=0092100) Train Loss: 0.6038, Train Steps/Sec: 1.21
|
| 1051 |
+
[[34m2025-10-28 21:17:10[0m] (step=0092200) Train Loss: 0.6029, Train Steps/Sec: 1.21
|
| 1052 |
+
[[34m2025-10-28 21:18:33[0m] (step=0092300) Train Loss: 0.6034, Train Steps/Sec: 1.21
|
| 1053 |
+
[[34m2025-10-28 21:19:55[0m] (step=0092400) Train Loss: 0.6039, Train Steps/Sec: 1.21
|
| 1054 |
+
[[34m2025-10-28 21:21:17[0m] (step=0092500) Train Loss: 0.6033, Train Steps/Sec: 1.21
|
| 1055 |
+
[[34m2025-10-28 21:22:19[0m] Beginning epoch 74...
|
| 1056 |
+
[[34m2025-10-28 21:22:42[0m] (step=0092600) Train Loss: 0.6031, Train Steps/Sec: 1.18
|
| 1057 |
+
[[34m2025-10-28 21:24:05[0m] (step=0092700) Train Loss: 0.6028, Train Steps/Sec: 1.21
|
| 1058 |
+
[[34m2025-10-28 21:25:27[0m] (step=0092800) Train Loss: 0.6040, Train Steps/Sec: 1.21
|
| 1059 |
+
[[34m2025-10-28 21:26:50[0m] (step=0092900) Train Loss: 0.6033, Train Steps/Sec: 1.21
|
| 1060 |
+
[[34m2025-10-28 21:28:12[0m] (step=0093000) Train Loss: 0.6022, Train Steps/Sec: 1.21
|
| 1061 |
+
[[34m2025-10-28 21:29:35[0m] (step=0093100) Train Loss: 0.6023, Train Steps/Sec: 1.21
|
| 1062 |
+
[[34m2025-10-28 21:30:57[0m] (step=0093200) Train Loss: 0.6036, Train Steps/Sec: 1.21
|
| 1063 |
+
[[34m2025-10-28 21:32:19[0m] (step=0093300) Train Loss: 0.6028, Train Steps/Sec: 1.21
|
| 1064 |
+
[[34m2025-10-28 21:33:42[0m] (step=0093400) Train Loss: 0.6027, Train Steps/Sec: 1.21
|
| 1065 |
+
[[34m2025-10-28 21:35:04[0m] (step=0093500) Train Loss: 0.6032, Train Steps/Sec: 1.21
|
| 1066 |
+
[[34m2025-10-28 21:36:27[0m] (step=0093600) Train Loss: 0.6033, Train Steps/Sec: 1.21
|
| 1067 |
+
[[34m2025-10-28 21:37:49[0m] (step=0093700) Train Loss: 0.6050, Train Steps/Sec: 1.21
|
| 1068 |
+
[[34m2025-10-28 21:39:11[0m] (step=0093800) Train Loss: 0.6031, Train Steps/Sec: 1.21
|
| 1069 |
+
[[34m2025-10-28 21:39:33[0m] Beginning epoch 75...
|
| 1070 |
+
[[34m2025-10-28 21:40:37[0m] (step=0093900) Train Loss: 0.6038, Train Steps/Sec: 1.17
|
| 1071 |
+
[[34m2025-10-28 21:42:00[0m] (step=0094000) Train Loss: 0.6031, Train Steps/Sec: 1.21
|
| 1072 |
+
[[34m2025-10-28 21:43:22[0m] (step=0094100) Train Loss: 0.6031, Train Steps/Sec: 1.21
|
| 1073 |
+
[[34m2025-10-28 21:44:44[0m] (step=0094200) Train Loss: 0.6030, Train Steps/Sec: 1.21
|
| 1074 |
+
[[34m2025-10-28 21:46:07[0m] (step=0094300) Train Loss: 0.6033, Train Steps/Sec: 1.21
|
| 1075 |
+
[[34m2025-10-28 21:47:29[0m] (step=0094400) Train Loss: 0.6032, Train Steps/Sec: 1.21
|
| 1076 |
+
[[34m2025-10-28 21:48:52[0m] (step=0094500) Train Loss: 0.6027, Train Steps/Sec: 1.21
|
| 1077 |
+
[[34m2025-10-28 21:50:14[0m] (step=0094600) Train Loss: 0.6039, Train Steps/Sec: 1.21
|
| 1078 |
+
[[34m2025-10-28 21:51:37[0m] (step=0094700) Train Loss: 0.6037, Train Steps/Sec: 1.21
|
| 1079 |
+
[[34m2025-10-28 21:52:59[0m] (step=0094800) Train Loss: 0.6034, Train Steps/Sec: 1.21
|
| 1080 |
+
[[34m2025-10-28 21:54:21[0m] (step=0094900) Train Loss: 0.6046, Train Steps/Sec: 1.21
|
| 1081 |
+
[[34m2025-10-28 21:55:44[0m] (step=0095000) Train Loss: 0.6017, Train Steps/Sec: 1.21
|
| 1082 |
+
[[34m2025-10-28 21:56:47[0m] Beginning epoch 76...
|
| 1083 |
+
[[34m2025-10-28 21:57:09[0m] (step=0095100) Train Loss: 0.6027, Train Steps/Sec: 1.18
|
| 1084 |
+
[[34m2025-10-28 21:58:31[0m] (step=0095200) Train Loss: 0.6028, Train Steps/Sec: 1.21
|
| 1085 |
+
[[34m2025-10-28 21:59:53[0m] (step=0095300) Train Loss: 0.6024, Train Steps/Sec: 1.21
|
| 1086 |
+
[[34m2025-10-28 22:01:16[0m] (step=0095400) Train Loss: 0.6033, Train Steps/Sec: 1.21
|
| 1087 |
+
[[34m2025-10-28 22:02:38[0m] (step=0095500) Train Loss: 0.6030, Train Steps/Sec: 1.22
|
| 1088 |
+
[[34m2025-10-28 22:04:01[0m] (step=0095600) Train Loss: 0.6013, Train Steps/Sec: 1.20
|
| 1089 |
+
[[34m2025-10-28 22:05:24[0m] (step=0095700) Train Loss: 0.6041, Train Steps/Sec: 1.21
|
| 1090 |
+
[[34m2025-10-28 22:06:46[0m] (step=0095800) Train Loss: 0.6020, Train Steps/Sec: 1.21
|
| 1091 |
+
[[34m2025-10-28 22:08:09[0m] (step=0095900) Train Loss: 0.6038, Train Steps/Sec: 1.21
|
| 1092 |
+
[[34m2025-10-28 22:09:31[0m] (step=0096000) Train Loss: 0.6043, Train Steps/Sec: 1.21
|
| 1093 |
+
[[34m2025-10-28 22:10:54[0m] (step=0096100) Train Loss: 0.6040, Train Steps/Sec: 1.21
|
| 1094 |
+
[[34m2025-10-28 22:12:16[0m] (step=0096200) Train Loss: 0.6036, Train Steps/Sec: 1.21
|
| 1095 |
+
[[34m2025-10-28 22:13:38[0m] (step=0096300) Train Loss: 0.6026, Train Steps/Sec: 1.21
|
| 1096 |
+
[[34m2025-10-28 22:14:01[0m] Beginning epoch 77...
|
| 1097 |
+
[[34m2025-10-28 22:15:03[0m] (step=0096400) Train Loss: 0.6027, Train Steps/Sec: 1.18
|
| 1098 |
+
[[34m2025-10-28 22:16:26[0m] (step=0096500) Train Loss: 0.6010, Train Steps/Sec: 1.21
|
| 1099 |
+
[[34m2025-10-28 22:17:48[0m] (step=0096600) Train Loss: 0.6014, Train Steps/Sec: 1.21
|
| 1100 |
+
[[34m2025-10-28 22:19:11[0m] (step=0096700) Train Loss: 0.6017, Train Steps/Sec: 1.21
|
| 1101 |
+
[[34m2025-10-28 22:20:33[0m] (step=0096800) Train Loss: 0.6028, Train Steps/Sec: 1.21
|
| 1102 |
+
[[34m2025-10-28 22:21:55[0m] (step=0096900) Train Loss: 0.6029, Train Steps/Sec: 1.21
|
| 1103 |
+
[[34m2025-10-28 22:23:18[0m] (step=0097000) Train Loss: 0.6023, Train Steps/Sec: 1.21
|
| 1104 |
+
[[34m2025-10-28 22:24:40[0m] (step=0097100) Train Loss: 0.6019, Train Steps/Sec: 1.21
|
| 1105 |
+
[[34m2025-10-28 22:26:03[0m] (step=0097200) Train Loss: 0.6038, Train Steps/Sec: 1.20
|
| 1106 |
+
[[34m2025-10-28 22:27:26[0m] (step=0097300) Train Loss: 0.6031, Train Steps/Sec: 1.21
|
| 1107 |
+
[[34m2025-10-28 22:28:49[0m] (step=0097400) Train Loss: 0.6031, Train Steps/Sec: 1.21
|
| 1108 |
+
[[34m2025-10-28 22:30:11[0m] (step=0097500) Train Loss: 0.6017, Train Steps/Sec: 1.21
|
| 1109 |
+
[[34m2025-10-28 22:31:16[0m] Beginning epoch 78...
|
| 1110 |
+
[[34m2025-10-28 22:31:36[0m] (step=0097600) Train Loss: 0.6025, Train Steps/Sec: 1.18
|
| 1111 |
+
[[34m2025-10-28 22:32:58[0m] (step=0097700) Train Loss: 0.6026, Train Steps/Sec: 1.21
|
| 1112 |
+
[[34m2025-10-28 22:34:21[0m] (step=0097800) Train Loss: 0.6030, Train Steps/Sec: 1.21
|
| 1113 |
+
[[34m2025-10-28 22:35:43[0m] (step=0097900) Train Loss: 0.6020, Train Steps/Sec: 1.21
|
| 1114 |
+
[[34m2025-10-28 22:37:06[0m] (step=0098000) Train Loss: 0.6021, Train Steps/Sec: 1.21
|
| 1115 |
+
[[34m2025-10-28 22:38:28[0m] (step=0098100) Train Loss: 0.6034, Train Steps/Sec: 1.21
|
| 1116 |
+
[[34m2025-10-28 22:39:50[0m] (step=0098200) Train Loss: 0.6023, Train Steps/Sec: 1.21
|