Auto-sync checkpoint during training
Browse files- best-train-loss.pt +2 -2
- best-valid-loss.pt +2 -2
- checkpoint-10000.pt +3 -0
- log/log-train-2026-01-13-11-44-05-0 +12 -0
- log/log-train-2026-01-13-11-44-05-1 +2 -0
best-train-loss.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87ddbc200560cee27b651e78ae7e7dbd116eedc6817617ee4bdd1d93640cb8ea
|
| 3 |
+
size 1141949779
|
best-valid-loss.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87ddbc200560cee27b651e78ae7e7dbd116eedc6817617ee4bdd1d93640cb8ea
|
| 3 |
+
size 1141949779
|
checkpoint-10000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db684253217c896a5baf48a45cb80099db53a70ac68ab118890c34d7c8396a62
|
| 3 |
+
size 1141965798
|
log/log-train-2026-01-13-11-44-05-0
CHANGED
|
@@ -1970,3 +1970,15 @@
|
|
| 1970 |
2026-01-13 13:45:12,829 INFO [train.py:895] (0/2) Epoch 7, batch 500, loss[loss=0.3197, simple_loss=0.3382, pruned_loss=0.1506, over 2787.00 frames. ], tot_loss[loss=0.2974, simple_loss=0.3339, pruned_loss=0.1304, over 504929.74 frames. ], batch size: 10, lr: 2.33e-02, grad_scale: 8.0
|
| 1971 |
2026-01-13 13:45:29,954 INFO [zipformer.py:1188] (0/2) warmup_begin=666.7, warmup_end=1333.3, batch_count=10447.0, num_to_drop=0, layers_to_drop=set()
|
| 1972 |
2026-01-13 13:45:34,706 INFO [zipformer.py:1188] (0/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=10454.0, num_to_drop=0, layers_to_drop=set()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1970 |
2026-01-13 13:45:12,829 INFO [train.py:895] (0/2) Epoch 7, batch 500, loss[loss=0.3197, simple_loss=0.3382, pruned_loss=0.1506, over 2787.00 frames. ], tot_loss[loss=0.2974, simple_loss=0.3339, pruned_loss=0.1304, over 504929.74 frames. ], batch size: 10, lr: 2.33e-02, grad_scale: 8.0
|
| 1971 |
2026-01-13 13:45:29,954 INFO [zipformer.py:1188] (0/2) warmup_begin=666.7, warmup_end=1333.3, batch_count=10447.0, num_to_drop=0, layers_to_drop=set()
|
| 1972 |
2026-01-13 13:45:34,706 INFO [zipformer.py:1188] (0/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=10454.0, num_to_drop=0, layers_to_drop=set()
|
| 1973 |
+
2026-01-13 13:45:39,033 INFO [zipformer.py:2441] (0/2) attn_weights_entropy = tensor([1.9033, 1.5718, 1.2905, 1.1539, 0.6515, 1.0316, 1.8272, 1.9611],
|
| 1974 |
+
device='cuda:0'), covar=tensor([0.0078, 0.0111, 0.0102, 0.0377, 0.0162, 0.0118, 0.0071, 0.0071],
|
| 1975 |
+
device='cuda:0'), in_proj_covar=tensor([0.0017, 0.0017, 0.0016, 0.0018, 0.0016, 0.0019, 0.0018, 0.0015],
|
| 1976 |
+
device='cuda:0'), out_proj_covar=tensor([1.0318e-05, 1.0372e-05, 9.9702e-06, 1.4535e-05, 1.0783e-05, 1.5542e-05,
|
| 1977 |
+
1.0069e-05, 8.5264e-06], device='cuda:0')
|
| 1978 |
+
2026-01-13 13:45:43,807 INFO [train.py:895] (0/2) Epoch 7, batch 550, loss[loss=0.4733, simple_loss=0.4814, pruned_loss=0.2326, over 2731.00 frames. ], tot_loss[loss=0.2973, simple_loss=0.334, pruned_loss=0.1303, over 514997.07 frames. ], batch size: 12, lr: 2.33e-02, grad_scale: 8.0
|
| 1979 |
+
2026-01-13 13:45:44,702 INFO [zipformer.py:1188] (0/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=10470.0, num_to_drop=0, layers_to_drop=set()
|
| 1980 |
+
2026-01-13 13:45:46,828 INFO [zipformer.py:2441] (0/2) attn_weights_entropy = tensor([3.6218, 2.6802, 2.2362, 2.9593, 1.2914, 2.5316, 2.4817, 3.8020],
|
| 1981 |
+
device='cuda:0'), covar=tensor([0.0386, 0.0919, 0.1763, 0.0998, 0.2258, 0.1688, 0.1148, 0.0571],
|
| 1982 |
+
device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0079, 0.0103, 0.0097, 0.0104, 0.0108, 0.0078, 0.0084],
|
| 1983 |
+
device='cuda:0'), out_proj_covar=tensor([8.0946e-05, 9.4900e-05, 1.2124e-04, 1.1471e-04, 1.2048e-04, 1.2621e-04,
|
| 1984 |
+
9.2687e-05, 1.0056e-04], device='cuda:0')
|
log/log-train-2026-01-13-11-44-05-1
CHANGED
|
@@ -1980,3 +1980,5 @@
|
|
| 1980 |
2026-01-13 13:45:12,827 INFO [train.py:895] (1/2) Epoch 7, batch 500, loss[loss=0.2124, simple_loss=0.284, pruned_loss=0.07033, over 2793.00 frames. ], tot_loss[loss=0.29, simple_loss=0.3307, pruned_loss=0.1247, over 506265.10 frames. ], batch size: 10, lr: 2.33e-02, grad_scale: 8.0
|
| 1981 |
2026-01-13 13:45:29,941 INFO [zipformer.py:1188] (1/2) warmup_begin=666.7, warmup_end=1333.3, batch_count=10447.0, num_to_drop=0, layers_to_drop=set()
|
| 1982 |
2026-01-13 13:45:34,720 INFO [zipformer.py:1188] (1/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=10454.0, num_to_drop=0, layers_to_drop=set()
|
|
|
|
|
|
|
|
|
| 1980 |
2026-01-13 13:45:12,827 INFO [train.py:895] (1/2) Epoch 7, batch 500, loss[loss=0.2124, simple_loss=0.284, pruned_loss=0.07033, over 2793.00 frames. ], tot_loss[loss=0.29, simple_loss=0.3307, pruned_loss=0.1247, over 506265.10 frames. ], batch size: 10, lr: 2.33e-02, grad_scale: 8.0
|
| 1981 |
2026-01-13 13:45:29,941 INFO [zipformer.py:1188] (1/2) warmup_begin=666.7, warmup_end=1333.3, batch_count=10447.0, num_to_drop=0, layers_to_drop=set()
|
| 1982 |
2026-01-13 13:45:34,720 INFO [zipformer.py:1188] (1/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=10454.0, num_to_drop=0, layers_to_drop=set()
|
| 1983 |
+
2026-01-13 13:45:43,802 INFO [train.py:895] (1/2) Epoch 7, batch 550, loss[loss=0.2838, simple_loss=0.3293, pruned_loss=0.1192, over 2764.00 frames. ], tot_loss[loss=0.2914, simple_loss=0.3316, pruned_loss=0.1256, over 514708.69 frames. ], batch size: 12, lr: 2.33e-02, grad_scale: 8.0
|
| 1984 |
+
2026-01-13 13:45:44,661 INFO [zipformer.py:1188] (1/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=10470.0, num_to_drop=0, layers_to_drop=set()
|