Update latest Amharic ASR checkpoint
Browse files
log/log-train-2026-01-13-11-44-05-0
CHANGED
|
@@ -3758,3 +3758,21 @@
|
|
| 3758 |
2026-01-13 15:46:46,741 INFO [optim.py:365] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.864e+01 1.259e+02 1.591e+02 1.949e+02 3.681e+02, threshold=3.181e+02, percent-clipped=0.0
|
| 3759 |
2026-01-13 15:46:50,275 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=192, metric=2.17 vs. limit=2.0
|
| 3760 |
2026-01-13 15:47:03,931 INFO [train.py:895] (0/2) Epoch 13, batch 700, loss[loss=0.2925, simple_loss=0.3152, pruned_loss=0.1349, over 2884.00 frames. ], tot_loss[loss=0.2226, simple_loss=0.2809, pruned_loss=0.08217, over 533634.28 frames. ], batch size: 8, lr: 1.29e-02, grad_scale: 16.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3758 |
2026-01-13 15:46:46,741 INFO [optim.py:365] (0/2) Clipping_scale=2.0, grad-norm quartiles 7.864e+01 1.259e+02 1.591e+02 1.949e+02 3.681e+02, threshold=3.181e+02, percent-clipped=0.0
|
| 3759 |
2026-01-13 15:46:50,275 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=192, metric=2.17 vs. limit=2.0
|
| 3760 |
2026-01-13 15:47:03,931 INFO [train.py:895] (0/2) Epoch 13, batch 700, loss[loss=0.2925, simple_loss=0.3152, pruned_loss=0.1349, over 2884.00 frames. ], tot_loss[loss=0.2226, simple_loss=0.2809, pruned_loss=0.08217, over 533634.28 frames. ], batch size: 8, lr: 1.29e-02, grad_scale: 16.0
|
| 3761 |
+
2026-01-13 15:47:18,754 INFO [zipformer.py:1188] (0/2) warmup_begin=1333.3, warmup_end=2000.0, batch_count=20560.0, num_to_drop=0, layers_to_drop=set()
|
| 3762 |
+
2026-01-13 15:47:35,001 INFO [train.py:895] (0/2) Epoch 13, batch 750, loss[loss=0.1895, simple_loss=0.2554, pruned_loss=0.06185, over 2708.00 frames. ], tot_loss[loss=0.2206, simple_loss=0.2794, pruned_loss=0.08083, over 537800.50 frames. ], batch size: 8, lr: 1.29e-02, grad_scale: 16.0
|
| 3763 |
+
2026-01-13 15:47:36,378 INFO [zipformer.py:1188] (0/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=20588.0, num_to_drop=0, layers_to_drop=set()
|
| 3764 |
+
2026-01-13 15:47:36,584 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=96, metric=1.94 vs. limit=2.0
|
| 3765 |
+
2026-01-13 15:47:48,102 INFO [optim.py:365] (0/2) Clipping_scale=2.0, grad-norm quartiles 9.539e+01 1.514e+02 2.079e+02 2.469e+02 8.293e+02, threshold=4.157e+02, percent-clipped=15.0
|
| 3766 |
+
2026-01-13 15:47:52,820 INFO [zipformer.py:2441] (0/2) attn_weights_entropy = tensor([3.2494, 3.1332, 1.9301, 2.1750, 1.2463, 3.2251, 2.5270, 2.5388],
|
| 3767 |
+
device='cuda:0'), covar=tensor([0.0053, 0.0096, 0.1110, 0.1072, 0.3190, 0.0173, 0.1256, 0.0980],
|
| 3768 |
+
device='cuda:0'), in_proj_covar=tensor([0.0083, 0.0084, 0.0183, 0.0196, 0.0282, 0.0117, 0.0209, 0.0204],
|
| 3769 |
+
device='cuda:0'), out_proj_covar=tensor([7.2618e-05, 7.4997e-05, 1.5429e-04, 1.7094e-04, 2.3905e-04, 1.0517e-04,
|
| 3770 |
+
1.7625e-04, 1.7579e-04], device='cuda:0')
|
| 3771 |
+
2026-01-13 15:48:05,168 INFO [train.py:895] (0/2) Epoch 13, batch 800, loss[loss=0.1665, simple_loss=0.2501, pruned_loss=0.04148, over 2654.00 frames. ], tot_loss[loss=0.2209, simple_loss=0.2793, pruned_loss=0.08121, over 540718.55 frames. ], batch size: 7, lr: 1.28e-02, grad_scale: 16.0
|
| 3772 |
+
2026-01-13 15:48:05,327 INFO [zipformer.py:2441] (0/2) attn_weights_entropy = tensor([2.2746, 0.7951, 2.8453, 2.6956, 2.6470, 1.7384, 2.4043, 1.9334],
|
| 3773 |
+
device='cuda:0'), covar=tensor([0.2023, 2.0635, 0.0697, 0.1922, 0.0724, 0.9655, 0.7537, 1.0239],
|
| 3774 |
+
device='cuda:0'), in_proj_covar=tensor([0.0107, 0.0209, 0.0100, 0.0096, 0.0097, 0.0166, 0.0201, 0.0187],
|
| 3775 |
+
device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002],
|
| 3776 |
+
device='cuda:0')
|
| 3777 |
+
2026-01-13 15:48:12,893 INFO [zipformer.py:1188] (0/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=20649.0, num_to_drop=0, layers_to_drop=set()
|
| 3778 |
+
2026-01-13 15:48:31,075 INFO [zipformer.py:1188] (0/2) warmup_begin=2000.0, warmup_end=2666.7, batch_count=20676.0, num_to_drop=0, layers_to_drop=set()
|
log/log-train-2026-01-13-11-44-05-1
CHANGED
|
@@ -3689,3 +3689,26 @@
|
|
| 3689 |
device='cuda:1'), out_proj_covar=tensor([6.8185e-05, 6.1237e-05, 4.5486e-05, 4.5339e-05, 5.0701e-05, 5.2962e-05,
|
| 3690 |
6.3357e-05, 6.6899e-05], device='cuda:1')
|
| 3691 |
2026-01-13 15:47:03,935 INFO [train.py:895] (1/2) Epoch 13, batch 700, loss[loss=0.1775, simple_loss=0.2541, pruned_loss=0.05045, over 2902.00 frames. ], tot_loss[loss=0.2249, simple_loss=0.2826, pruned_loss=0.08362, over 534313.93 frames. ], batch size: 8, lr: 1.29e-02, grad_scale: 16.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3689 |
device='cuda:1'), out_proj_covar=tensor([6.8185e-05, 6.1237e-05, 4.5486e-05, 4.5339e-05, 5.0701e-05, 5.2962e-05,
|
| 3690 |
6.3357e-05, 6.6899e-05], device='cuda:1')
|
| 3691 |
2026-01-13 15:47:03,935 INFO [train.py:895] (1/2) Epoch 13, batch 700, loss[loss=0.1775, simple_loss=0.2541, pruned_loss=0.05045, over 2902.00 frames. ], tot_loss[loss=0.2249, simple_loss=0.2826, pruned_loss=0.08362, over 534313.93 frames. ], batch size: 8, lr: 1.29e-02, grad_scale: 16.0
|
| 3692 |
+
2026-01-13 15:47:18,761 INFO [zipformer.py:1188] (1/2) warmup_begin=1333.3, warmup_end=2000.0, batch_count=20560.0, num_to_drop=0, layers_to_drop=set()
|
| 3693 |
+
2026-01-13 15:47:20,718 INFO [zipformer.py:2441] (1/2) attn_weights_entropy = tensor([2.9208, 1.1298, 3.8166, 3.4855, 3.9530, 1.8050, 2.9787, 2.3715],
|
| 3694 |
+
device='cuda:1'), covar=tensor([0.1640, 2.2930, 0.0283, 0.1083, 0.0303, 1.2217, 0.8838, 1.5685],
|
| 3695 |
+
device='cuda:1'), in_proj_covar=tensor([0.0106, 0.0210, 0.0100, 0.0095, 0.0096, 0.0168, 0.0204, 0.0186],
|
| 3696 |
+
device='cuda:1'), out_proj_covar=tensor([0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002],
|
| 3697 |
+
device='cuda:1')
|
| 3698 |
+
2026-01-13 15:47:34,998 INFO [train.py:895] (1/2) Epoch 13, batch 750, loss[loss=0.1921, simple_loss=0.244, pruned_loss=0.07014, over 2710.00 frames. ], tot_loss[loss=0.2209, simple_loss=0.2794, pruned_loss=0.08118, over 538225.96 frames. ], batch size: 8, lr: 1.29e-02, grad_scale: 16.0
|
| 3699 |
+
2026-01-13 15:47:36,359 INFO [zipformer.py:1188] (1/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=20588.0, num_to_drop=0, layers_to_drop=set()
|
| 3700 |
+
2026-01-13 15:47:36,461 INFO [zipformer.py:2441] (1/2) attn_weights_entropy = tensor([1.2416, 3.9089, 3.9679, 2.4147, 4.3130, 4.5171, 1.9509, 1.7787],
|
| 3701 |
+
device='cuda:1'), covar=tensor([0.1302, 0.0027, 0.0031, 0.0445, 0.0036, 0.0041, 0.1327, 0.0881],
|
| 3702 |
+
device='cuda:1'), in_proj_covar=tensor([0.0184, 0.0100, 0.0102, 0.0149, 0.0102, 0.0098, 0.0237, 0.0177],
|
| 3703 |
+
device='cuda:1'), out_proj_covar=tensor([9.8013e-05, 3.4690e-05, 3.4734e-05, 6.2126e-05, 3.6560e-05, 3.3490e-05,
|
| 3704 |
+
1.3806e-04, 8.2738e-05], device='cuda:1')
|
| 3705 |
+
2026-01-13 15:47:48,101 INFO [optim.py:365] (1/2) Clipping_scale=2.0, grad-norm quartiles 9.539e+01 1.514e+02 2.079e+02 2.469e+02 8.293e+02, threshold=4.157e+02, percent-clipped=15.0
|
| 3706 |
+
2026-01-13 15:48:05,169 INFO [train.py:895] (1/2) Epoch 13, batch 800, loss[loss=0.1634, simple_loss=0.2395, pruned_loss=0.04359, over 2660.00 frames. ], tot_loss[loss=0.2222, simple_loss=0.28, pruned_loss=0.08217, over 541133.59 frames. ], batch size: 7, lr: 1.28e-02, grad_scale: 16.0
|
| 3707 |
+
2026-01-13 15:48:10,402 INFO [zipformer.py:2441] (1/2) attn_weights_entropy = tensor([1.7609, 3.2593, 1.9309, 2.0313, 3.5918, 1.7806, 1.7308, 1.9669],
|
| 3708 |
+
device='cuda:1'), covar=tensor([0.5084, 0.0357, 0.2815, 0.4647, 0.0368, 0.3881, 0.3024, 0.3533],
|
| 3709 |
+
device='cuda:1'), in_proj_covar=tensor([0.0226, 0.0103, 0.0175, 0.0247, 0.0101, 0.0200, 0.0179, 0.0190],
|
| 3710 |
+
device='cuda:1'), out_proj_covar=tensor([0.0003, 0.0001, 0.0002, 0.0003, 0.0001, 0.0002, 0.0002, 0.0002],
|
| 3711 |
+
device='cuda:1')
|
| 3712 |
+
2026-01-13 15:48:12,946 INFO [zipformer.py:1188] (1/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=20649.0, num_to_drop=0, layers_to_drop=set()
|
| 3713 |
+
2026-01-13 15:48:30,026 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=1.98 vs. limit=2.0
|
| 3714 |
+
2026-01-13 15:48:31,056 INFO [zipformer.py:1188] (1/2) warmup_begin=2000.0, warmup_end=2666.7, batch_count=20676.0, num_to_drop=0, layers_to_drop=set()
|
tensorboard/events.out.tfevents.1768304645.8e64ffbd666a.97184.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dd5b84443c9db429e9c7205731f56796d566848d6fc10662bf8fe66858f8a22
|
| 3 |
+
size 206083
|