Training in progress, step 20000, checkpoint
Browse files- last-checkpoint/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step20000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +586 -6
last-checkpoint/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ade9c3e43b2a1550492ecf4b91e9228af429dcf0d7b1c09aea81ebc7a5842d20
|
| 3 |
+
size 761059696
|
last-checkpoint/global_step20000/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58f46b37e83d56bff8e8b49fc01d48e56f7c2f6034abd01b65de03f862980853
|
| 3 |
+
size 129965712
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step20000
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 181508256
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b22de776648c8fc55dbdb37a34986669b21215c0d0cc7d4355ba0090a00314ad
|
| 3 |
size 181508256
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:907910c4d615478ec9b347b176d82b2a1be77f33469156f9f4b3321b8fe69355
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29c7a79b53a589de48d3b7a21df9c0d024be4dea79f68869f72fdc01ae3b212a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 96.
|
| 3 |
-
"best_model_checkpoint": "./iteboshi_temp/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -5227,6 +5227,586 @@
|
|
| 5227 |
"eval_steps_per_second": 3.303,
|
| 5228 |
"eval_wer": 96.61480433757662,
|
| 5229 |
"step": 18000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5230 |
}
|
| 5231 |
],
|
| 5232 |
"logging_steps": 25,
|
|
@@ -5241,12 +5821,12 @@
|
|
| 5241 |
"should_evaluate": false,
|
| 5242 |
"should_log": false,
|
| 5243 |
"should_save": true,
|
| 5244 |
-
"should_training_stop":
|
| 5245 |
},
|
| 5246 |
"attributes": {}
|
| 5247 |
}
|
| 5248 |
},
|
| 5249 |
-
"total_flos": 3.
|
| 5250 |
"train_batch_size": 4,
|
| 5251 |
"trial_name": null,
|
| 5252 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 96.57708628005658,
|
| 3 |
+
"best_model_checkpoint": "./iteboshi_temp/checkpoint-19000",
|
| 4 |
+
"epoch": 22.026431718061673,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 20000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 5227 |
"eval_steps_per_second": 3.303,
|
| 5228 |
"eval_wer": 96.61480433757662,
|
| 5229 |
"step": 18000
|
| 5230 |
+
},
|
| 5231 |
+
{
|
| 5232 |
+
"epoch": 19.851321585903083,
|
| 5233 |
+
"grad_norm": 0.0205672699958086,
|
| 5234 |
+
"learning_rate": 2.025641025641026e-06,
|
| 5235 |
+
"loss": 0.0034,
|
| 5236 |
+
"step": 18025
|
| 5237 |
+
},
|
| 5238 |
+
{
|
| 5239 |
+
"epoch": 19.878854625550662,
|
| 5240 |
+
"grad_norm": 0.017052460461854935,
|
| 5241 |
+
"learning_rate": 2.0000000000000003e-06,
|
| 5242 |
+
"loss": 0.0024,
|
| 5243 |
+
"step": 18050
|
| 5244 |
+
},
|
| 5245 |
+
{
|
| 5246 |
+
"epoch": 19.90638766519824,
|
| 5247 |
+
"grad_norm": 0.023273777216672897,
|
| 5248 |
+
"learning_rate": 1.9743589743589744e-06,
|
| 5249 |
+
"loss": 0.0024,
|
| 5250 |
+
"step": 18075
|
| 5251 |
+
},
|
| 5252 |
+
{
|
| 5253 |
+
"epoch": 19.933920704845814,
|
| 5254 |
+
"grad_norm": 0.01854720339179039,
|
| 5255 |
+
"learning_rate": 1.948717948717949e-06,
|
| 5256 |
+
"loss": 0.0029,
|
| 5257 |
+
"step": 18100
|
| 5258 |
+
},
|
| 5259 |
+
{
|
| 5260 |
+
"epoch": 19.961453744493394,
|
| 5261 |
+
"grad_norm": 0.023288726806640625,
|
| 5262 |
+
"learning_rate": 1.9230769230769234e-06,
|
| 5263 |
+
"loss": 0.0025,
|
| 5264 |
+
"step": 18125
|
| 5265 |
+
},
|
| 5266 |
+
{
|
| 5267 |
+
"epoch": 19.98898678414097,
|
| 5268 |
+
"grad_norm": 0.019170600920915604,
|
| 5269 |
+
"learning_rate": 1.8974358974358975e-06,
|
| 5270 |
+
"loss": 0.0024,
|
| 5271 |
+
"step": 18150
|
| 5272 |
+
},
|
| 5273 |
+
{
|
| 5274 |
+
"epoch": 20.016519823788546,
|
| 5275 |
+
"grad_norm": 0.013864605687558651,
|
| 5276 |
+
"learning_rate": 1.871794871794872e-06,
|
| 5277 |
+
"loss": 0.0021,
|
| 5278 |
+
"step": 18175
|
| 5279 |
+
},
|
| 5280 |
+
{
|
| 5281 |
+
"epoch": 20.044052863436125,
|
| 5282 |
+
"grad_norm": 0.015261122956871986,
|
| 5283 |
+
"learning_rate": 1.8461538461538465e-06,
|
| 5284 |
+
"loss": 0.002,
|
| 5285 |
+
"step": 18200
|
| 5286 |
+
},
|
| 5287 |
+
{
|
| 5288 |
+
"epoch": 20.0715859030837,
|
| 5289 |
+
"grad_norm": 0.015079254284501076,
|
| 5290 |
+
"learning_rate": 1.8205128205128205e-06,
|
| 5291 |
+
"loss": 0.0024,
|
| 5292 |
+
"step": 18225
|
| 5293 |
+
},
|
| 5294 |
+
{
|
| 5295 |
+
"epoch": 20.099118942731277,
|
| 5296 |
+
"grad_norm": 0.013841504231095314,
|
| 5297 |
+
"learning_rate": 1.794871794871795e-06,
|
| 5298 |
+
"loss": 0.003,
|
| 5299 |
+
"step": 18250
|
| 5300 |
+
},
|
| 5301 |
+
{
|
| 5302 |
+
"epoch": 20.126651982378856,
|
| 5303 |
+
"grad_norm": 0.017009438946843147,
|
| 5304 |
+
"learning_rate": 1.7692307692307695e-06,
|
| 5305 |
+
"loss": 0.002,
|
| 5306 |
+
"step": 18275
|
| 5307 |
+
},
|
| 5308 |
+
{
|
| 5309 |
+
"epoch": 20.154185022026432,
|
| 5310 |
+
"grad_norm": 0.01796025224030018,
|
| 5311 |
+
"learning_rate": 1.7435897435897436e-06,
|
| 5312 |
+
"loss": 0.0019,
|
| 5313 |
+
"step": 18300
|
| 5314 |
+
},
|
| 5315 |
+
{
|
| 5316 |
+
"epoch": 20.181718061674008,
|
| 5317 |
+
"grad_norm": 0.020462974905967712,
|
| 5318 |
+
"learning_rate": 1.717948717948718e-06,
|
| 5319 |
+
"loss": 0.002,
|
| 5320 |
+
"step": 18325
|
| 5321 |
+
},
|
| 5322 |
+
{
|
| 5323 |
+
"epoch": 20.209251101321588,
|
| 5324 |
+
"grad_norm": 0.0168469101190567,
|
| 5325 |
+
"learning_rate": 1.6923076923076926e-06,
|
| 5326 |
+
"loss": 0.002,
|
| 5327 |
+
"step": 18350
|
| 5328 |
+
},
|
| 5329 |
+
{
|
| 5330 |
+
"epoch": 20.236784140969164,
|
| 5331 |
+
"grad_norm": 0.015358548611402512,
|
| 5332 |
+
"learning_rate": 1.6666666666666667e-06,
|
| 5333 |
+
"loss": 0.0019,
|
| 5334 |
+
"step": 18375
|
| 5335 |
+
},
|
| 5336 |
+
{
|
| 5337 |
+
"epoch": 20.26431718061674,
|
| 5338 |
+
"grad_norm": 0.01623690128326416,
|
| 5339 |
+
"learning_rate": 1.6410256410256412e-06,
|
| 5340 |
+
"loss": 0.0019,
|
| 5341 |
+
"step": 18400
|
| 5342 |
+
},
|
| 5343 |
+
{
|
| 5344 |
+
"epoch": 20.291850220264315,
|
| 5345 |
+
"grad_norm": 0.016147859394550323,
|
| 5346 |
+
"learning_rate": 1.6153846153846157e-06,
|
| 5347 |
+
"loss": 0.002,
|
| 5348 |
+
"step": 18425
|
| 5349 |
+
},
|
| 5350 |
+
{
|
| 5351 |
+
"epoch": 20.319383259911895,
|
| 5352 |
+
"grad_norm": 0.023021413013339043,
|
| 5353 |
+
"learning_rate": 1.5897435897435897e-06,
|
| 5354 |
+
"loss": 0.0023,
|
| 5355 |
+
"step": 18450
|
| 5356 |
+
},
|
| 5357 |
+
{
|
| 5358 |
+
"epoch": 20.34691629955947,
|
| 5359 |
+
"grad_norm": 0.0137328477576375,
|
| 5360 |
+
"learning_rate": 1.5641025641025642e-06,
|
| 5361 |
+
"loss": 0.0019,
|
| 5362 |
+
"step": 18475
|
| 5363 |
+
},
|
| 5364 |
+
{
|
| 5365 |
+
"epoch": 20.374449339207047,
|
| 5366 |
+
"grad_norm": 0.01765141263604164,
|
| 5367 |
+
"learning_rate": 1.5384615384615387e-06,
|
| 5368 |
+
"loss": 0.0022,
|
| 5369 |
+
"step": 18500
|
| 5370 |
+
},
|
| 5371 |
+
{
|
| 5372 |
+
"epoch": 20.401982378854626,
|
| 5373 |
+
"grad_norm": 0.015655307099223137,
|
| 5374 |
+
"learning_rate": 1.5128205128205128e-06,
|
| 5375 |
+
"loss": 0.0038,
|
| 5376 |
+
"step": 18525
|
| 5377 |
+
},
|
| 5378 |
+
{
|
| 5379 |
+
"epoch": 20.429515418502202,
|
| 5380 |
+
"grad_norm": 0.021192258223891258,
|
| 5381 |
+
"learning_rate": 1.4871794871794873e-06,
|
| 5382 |
+
"loss": 0.0021,
|
| 5383 |
+
"step": 18550
|
| 5384 |
+
},
|
| 5385 |
+
{
|
| 5386 |
+
"epoch": 20.457048458149778,
|
| 5387 |
+
"grad_norm": 0.014702214859426022,
|
| 5388 |
+
"learning_rate": 1.4615384615384618e-06,
|
| 5389 |
+
"loss": 0.0019,
|
| 5390 |
+
"step": 18575
|
| 5391 |
+
},
|
| 5392 |
+
{
|
| 5393 |
+
"epoch": 20.484581497797357,
|
| 5394 |
+
"grad_norm": 0.018568340688943863,
|
| 5395 |
+
"learning_rate": 1.4358974358974359e-06,
|
| 5396 |
+
"loss": 0.0018,
|
| 5397 |
+
"step": 18600
|
| 5398 |
+
},
|
| 5399 |
+
{
|
| 5400 |
+
"epoch": 20.512114537444933,
|
| 5401 |
+
"grad_norm": 0.020032202824950218,
|
| 5402 |
+
"learning_rate": 1.4102564102564104e-06,
|
| 5403 |
+
"loss": 0.002,
|
| 5404 |
+
"step": 18625
|
| 5405 |
+
},
|
| 5406 |
+
{
|
| 5407 |
+
"epoch": 20.53964757709251,
|
| 5408 |
+
"grad_norm": 0.01590747945010662,
|
| 5409 |
+
"learning_rate": 1.3846153846153848e-06,
|
| 5410 |
+
"loss": 0.002,
|
| 5411 |
+
"step": 18650
|
| 5412 |
+
},
|
| 5413 |
+
{
|
| 5414 |
+
"epoch": 20.56718061674009,
|
| 5415 |
+
"grad_norm": 0.014293953776359558,
|
| 5416 |
+
"learning_rate": 1.358974358974359e-06,
|
| 5417 |
+
"loss": 0.002,
|
| 5418 |
+
"step": 18675
|
| 5419 |
+
},
|
| 5420 |
+
{
|
| 5421 |
+
"epoch": 20.594713656387665,
|
| 5422 |
+
"grad_norm": 0.0199781134724617,
|
| 5423 |
+
"learning_rate": 1.3333333333333334e-06,
|
| 5424 |
+
"loss": 0.0019,
|
| 5425 |
+
"step": 18700
|
| 5426 |
+
},
|
| 5427 |
+
{
|
| 5428 |
+
"epoch": 20.62224669603524,
|
| 5429 |
+
"grad_norm": 0.018757140263915062,
|
| 5430 |
+
"learning_rate": 1.307692307692308e-06,
|
| 5431 |
+
"loss": 0.0022,
|
| 5432 |
+
"step": 18725
|
| 5433 |
+
},
|
| 5434 |
+
{
|
| 5435 |
+
"epoch": 20.64977973568282,
|
| 5436 |
+
"grad_norm": 0.021107446402311325,
|
| 5437 |
+
"learning_rate": 1.282051282051282e-06,
|
| 5438 |
+
"loss": 0.0029,
|
| 5439 |
+
"step": 18750
|
| 5440 |
+
},
|
| 5441 |
+
{
|
| 5442 |
+
"epoch": 20.677312775330396,
|
| 5443 |
+
"grad_norm": 0.018470246344804764,
|
| 5444 |
+
"learning_rate": 1.2564102564102565e-06,
|
| 5445 |
+
"loss": 0.0021,
|
| 5446 |
+
"step": 18775
|
| 5447 |
+
},
|
| 5448 |
+
{
|
| 5449 |
+
"epoch": 20.704845814977972,
|
| 5450 |
+
"grad_norm": 0.01821320876479149,
|
| 5451 |
+
"learning_rate": 1.230769230769231e-06,
|
| 5452 |
+
"loss": 0.0022,
|
| 5453 |
+
"step": 18800
|
| 5454 |
+
},
|
| 5455 |
+
{
|
| 5456 |
+
"epoch": 20.73237885462555,
|
| 5457 |
+
"grad_norm": 0.15323257446289062,
|
| 5458 |
+
"learning_rate": 1.2051282051282053e-06,
|
| 5459 |
+
"loss": 0.0024,
|
| 5460 |
+
"step": 18825
|
| 5461 |
+
},
|
| 5462 |
+
{
|
| 5463 |
+
"epoch": 20.759911894273127,
|
| 5464 |
+
"grad_norm": 0.015295284800231457,
|
| 5465 |
+
"learning_rate": 1.1794871794871795e-06,
|
| 5466 |
+
"loss": 0.002,
|
| 5467 |
+
"step": 18850
|
| 5468 |
+
},
|
| 5469 |
+
{
|
| 5470 |
+
"epoch": 20.787444933920703,
|
| 5471 |
+
"grad_norm": 0.015194980427622795,
|
| 5472 |
+
"learning_rate": 1.153846153846154e-06,
|
| 5473 |
+
"loss": 0.0018,
|
| 5474 |
+
"step": 18875
|
| 5475 |
+
},
|
| 5476 |
+
{
|
| 5477 |
+
"epoch": 20.814977973568283,
|
| 5478 |
+
"grad_norm": 0.05270170047879219,
|
| 5479 |
+
"learning_rate": 1.1282051282051283e-06,
|
| 5480 |
+
"loss": 0.0024,
|
| 5481 |
+
"step": 18900
|
| 5482 |
+
},
|
| 5483 |
+
{
|
| 5484 |
+
"epoch": 20.84251101321586,
|
| 5485 |
+
"grad_norm": 0.01960138976573944,
|
| 5486 |
+
"learning_rate": 1.1025641025641026e-06,
|
| 5487 |
+
"loss": 0.0021,
|
| 5488 |
+
"step": 18925
|
| 5489 |
+
},
|
| 5490 |
+
{
|
| 5491 |
+
"epoch": 20.870044052863435,
|
| 5492 |
+
"grad_norm": 0.02073553018271923,
|
| 5493 |
+
"learning_rate": 1.076923076923077e-06,
|
| 5494 |
+
"loss": 0.0019,
|
| 5495 |
+
"step": 18950
|
| 5496 |
+
},
|
| 5497 |
+
{
|
| 5498 |
+
"epoch": 20.897577092511014,
|
| 5499 |
+
"grad_norm": 0.01615351065993309,
|
| 5500 |
+
"learning_rate": 1.0512820512820514e-06,
|
| 5501 |
+
"loss": 0.002,
|
| 5502 |
+
"step": 18975
|
| 5503 |
+
},
|
| 5504 |
+
{
|
| 5505 |
+
"epoch": 20.92511013215859,
|
| 5506 |
+
"grad_norm": 0.021563587710261345,
|
| 5507 |
+
"learning_rate": 1.0256410256410257e-06,
|
| 5508 |
+
"loss": 0.0021,
|
| 5509 |
+
"step": 19000
|
| 5510 |
+
},
|
| 5511 |
+
{
|
| 5512 |
+
"epoch": 20.92511013215859,
|
| 5513 |
+
"eval_cer": 55.589054600896446,
|
| 5514 |
+
"eval_loss": 1.0507194995880127,
|
| 5515 |
+
"eval_runtime": 844.8487,
|
| 5516 |
+
"eval_samples_per_second": 12.524,
|
| 5517 |
+
"eval_steps_per_second": 3.132,
|
| 5518 |
+
"eval_wer": 96.57708628005658,
|
| 5519 |
+
"step": 19000
|
| 5520 |
+
},
|
| 5521 |
+
{
|
| 5522 |
+
"epoch": 20.952643171806166,
|
| 5523 |
+
"grad_norm": 0.016109561547636986,
|
| 5524 |
+
"learning_rate": 1.0000000000000002e-06,
|
| 5525 |
+
"loss": 0.002,
|
| 5526 |
+
"step": 19025
|
| 5527 |
+
},
|
| 5528 |
+
{
|
| 5529 |
+
"epoch": 20.980176211453745,
|
| 5530 |
+
"grad_norm": 0.016952887177467346,
|
| 5531 |
+
"learning_rate": 9.743589743589745e-07,
|
| 5532 |
+
"loss": 0.002,
|
| 5533 |
+
"step": 19050
|
| 5534 |
+
},
|
| 5535 |
+
{
|
| 5536 |
+
"epoch": 21.00770925110132,
|
| 5537 |
+
"grad_norm": 0.01466713659465313,
|
| 5538 |
+
"learning_rate": 9.487179487179487e-07,
|
| 5539 |
+
"loss": 0.002,
|
| 5540 |
+
"step": 19075
|
| 5541 |
+
},
|
| 5542 |
+
{
|
| 5543 |
+
"epoch": 21.035242290748897,
|
| 5544 |
+
"grad_norm": 0.01427449006587267,
|
| 5545 |
+
"learning_rate": 9.230769230769232e-07,
|
| 5546 |
+
"loss": 0.002,
|
| 5547 |
+
"step": 19100
|
| 5548 |
+
},
|
| 5549 |
+
{
|
| 5550 |
+
"epoch": 21.062775330396477,
|
| 5551 |
+
"grad_norm": 0.016093429177999496,
|
| 5552 |
+
"learning_rate": 8.974358974358975e-07,
|
| 5553 |
+
"loss": 0.0018,
|
| 5554 |
+
"step": 19125
|
| 5555 |
+
},
|
| 5556 |
+
{
|
| 5557 |
+
"epoch": 21.090308370044053,
|
| 5558 |
+
"grad_norm": 0.019426781684160233,
|
| 5559 |
+
"learning_rate": 8.717948717948718e-07,
|
| 5560 |
+
"loss": 0.0018,
|
| 5561 |
+
"step": 19150
|
| 5562 |
+
},
|
| 5563 |
+
{
|
| 5564 |
+
"epoch": 21.11784140969163,
|
| 5565 |
+
"grad_norm": 0.0124832633882761,
|
| 5566 |
+
"learning_rate": 8.461538461538463e-07,
|
| 5567 |
+
"loss": 0.0017,
|
| 5568 |
+
"step": 19175
|
| 5569 |
+
},
|
| 5570 |
+
{
|
| 5571 |
+
"epoch": 21.145374449339208,
|
| 5572 |
+
"grad_norm": 0.01551234070211649,
|
| 5573 |
+
"learning_rate": 8.205128205128206e-07,
|
| 5574 |
+
"loss": 0.0018,
|
| 5575 |
+
"step": 19200
|
| 5576 |
+
},
|
| 5577 |
+
{
|
| 5578 |
+
"epoch": 21.172907488986784,
|
| 5579 |
+
"grad_norm": 0.01290995441377163,
|
| 5580 |
+
"learning_rate": 7.948717948717949e-07,
|
| 5581 |
+
"loss": 0.0019,
|
| 5582 |
+
"step": 19225
|
| 5583 |
+
},
|
| 5584 |
+
{
|
| 5585 |
+
"epoch": 21.20044052863436,
|
| 5586 |
+
"grad_norm": 0.012107312679290771,
|
| 5587 |
+
"learning_rate": 7.692307692307694e-07,
|
| 5588 |
+
"loss": 0.0018,
|
| 5589 |
+
"step": 19250
|
| 5590 |
+
},
|
| 5591 |
+
{
|
| 5592 |
+
"epoch": 21.22797356828194,
|
| 5593 |
+
"grad_norm": 0.013243271969258785,
|
| 5594 |
+
"learning_rate": 7.435897435897436e-07,
|
| 5595 |
+
"loss": 0.0018,
|
| 5596 |
+
"step": 19275
|
| 5597 |
+
},
|
| 5598 |
+
{
|
| 5599 |
+
"epoch": 21.255506607929515,
|
| 5600 |
+
"grad_norm": 0.01567436195909977,
|
| 5601 |
+
"learning_rate": 7.179487179487179e-07,
|
| 5602 |
+
"loss": 0.0017,
|
| 5603 |
+
"step": 19300
|
| 5604 |
+
},
|
| 5605 |
+
{
|
| 5606 |
+
"epoch": 21.28303964757709,
|
| 5607 |
+
"grad_norm": 0.017800329253077507,
|
| 5608 |
+
"learning_rate": 6.923076923076924e-07,
|
| 5609 |
+
"loss": 0.0017,
|
| 5610 |
+
"step": 19325
|
| 5611 |
+
},
|
| 5612 |
+
{
|
| 5613 |
+
"epoch": 21.31057268722467,
|
| 5614 |
+
"grad_norm": 0.012769469991326332,
|
| 5615 |
+
"learning_rate": 6.666666666666667e-07,
|
| 5616 |
+
"loss": 0.0018,
|
| 5617 |
+
"step": 19350
|
| 5618 |
+
},
|
| 5619 |
+
{
|
| 5620 |
+
"epoch": 21.338105726872246,
|
| 5621 |
+
"grad_norm": 0.013936811126768589,
|
| 5622 |
+
"learning_rate": 6.41025641025641e-07,
|
| 5623 |
+
"loss": 0.0018,
|
| 5624 |
+
"step": 19375
|
| 5625 |
+
},
|
| 5626 |
+
{
|
| 5627 |
+
"epoch": 21.365638766519822,
|
| 5628 |
+
"grad_norm": 0.017832236364483833,
|
| 5629 |
+
"learning_rate": 6.153846153846155e-07,
|
| 5630 |
+
"loss": 0.0018,
|
| 5631 |
+
"step": 19400
|
| 5632 |
+
},
|
| 5633 |
+
{
|
| 5634 |
+
"epoch": 21.393171806167402,
|
| 5635 |
+
"grad_norm": 0.016330501064658165,
|
| 5636 |
+
"learning_rate": 5.897435897435898e-07,
|
| 5637 |
+
"loss": 0.0019,
|
| 5638 |
+
"step": 19425
|
| 5639 |
+
},
|
| 5640 |
+
{
|
| 5641 |
+
"epoch": 21.420704845814978,
|
| 5642 |
+
"grad_norm": 0.012162838131189346,
|
| 5643 |
+
"learning_rate": 5.641025641025642e-07,
|
| 5644 |
+
"loss": 0.0018,
|
| 5645 |
+
"step": 19450
|
| 5646 |
+
},
|
| 5647 |
+
{
|
| 5648 |
+
"epoch": 21.448237885462554,
|
| 5649 |
+
"grad_norm": 0.01499269250780344,
|
| 5650 |
+
"learning_rate": 5.384615384615386e-07,
|
| 5651 |
+
"loss": 0.0019,
|
| 5652 |
+
"step": 19475
|
| 5653 |
+
},
|
| 5654 |
+
{
|
| 5655 |
+
"epoch": 21.475770925110133,
|
| 5656 |
+
"grad_norm": 0.013169058598577976,
|
| 5657 |
+
"learning_rate": 5.128205128205128e-07,
|
| 5658 |
+
"loss": 0.0019,
|
| 5659 |
+
"step": 19500
|
| 5660 |
+
},
|
| 5661 |
+
{
|
| 5662 |
+
"epoch": 21.50330396475771,
|
| 5663 |
+
"grad_norm": 0.011718913912773132,
|
| 5664 |
+
"learning_rate": 4.871794871794872e-07,
|
| 5665 |
+
"loss": 0.0018,
|
| 5666 |
+
"step": 19525
|
| 5667 |
+
},
|
| 5668 |
+
{
|
| 5669 |
+
"epoch": 21.530837004405285,
|
| 5670 |
+
"grad_norm": 0.01436688657850027,
|
| 5671 |
+
"learning_rate": 4.615384615384616e-07,
|
| 5672 |
+
"loss": 0.0019,
|
| 5673 |
+
"step": 19550
|
| 5674 |
+
},
|
| 5675 |
+
{
|
| 5676 |
+
"epoch": 21.558370044052865,
|
| 5677 |
+
"grad_norm": 0.012899577617645264,
|
| 5678 |
+
"learning_rate": 4.358974358974359e-07,
|
| 5679 |
+
"loss": 0.0016,
|
| 5680 |
+
"step": 19575
|
| 5681 |
+
},
|
| 5682 |
+
{
|
| 5683 |
+
"epoch": 21.58590308370044,
|
| 5684 |
+
"grad_norm": 0.018741106614470482,
|
| 5685 |
+
"learning_rate": 4.102564102564103e-07,
|
| 5686 |
+
"loss": 0.0018,
|
| 5687 |
+
"step": 19600
|
| 5688 |
+
},
|
| 5689 |
+
{
|
| 5690 |
+
"epoch": 21.613436123348016,
|
| 5691 |
+
"grad_norm": 0.011879649944603443,
|
| 5692 |
+
"learning_rate": 3.846153846153847e-07,
|
| 5693 |
+
"loss": 0.0018,
|
| 5694 |
+
"step": 19625
|
| 5695 |
+
},
|
| 5696 |
+
{
|
| 5697 |
+
"epoch": 21.640969162995596,
|
| 5698 |
+
"grad_norm": 0.01298064086586237,
|
| 5699 |
+
"learning_rate": 3.5897435897435896e-07,
|
| 5700 |
+
"loss": 0.0018,
|
| 5701 |
+
"step": 19650
|
| 5702 |
+
},
|
| 5703 |
+
{
|
| 5704 |
+
"epoch": 21.66850220264317,
|
| 5705 |
+
"grad_norm": 0.0132521390914917,
|
| 5706 |
+
"learning_rate": 3.3333333333333335e-07,
|
| 5707 |
+
"loss": 0.0017,
|
| 5708 |
+
"step": 19675
|
| 5709 |
+
},
|
| 5710 |
+
{
|
| 5711 |
+
"epoch": 21.696035242290748,
|
| 5712 |
+
"grad_norm": 0.012232212349772453,
|
| 5713 |
+
"learning_rate": 3.0769230769230774e-07,
|
| 5714 |
+
"loss": 0.0022,
|
| 5715 |
+
"step": 19700
|
| 5716 |
+
},
|
| 5717 |
+
{
|
| 5718 |
+
"epoch": 21.723568281938327,
|
| 5719 |
+
"grad_norm": 0.0125159602612257,
|
| 5720 |
+
"learning_rate": 2.820512820512821e-07,
|
| 5721 |
+
"loss": 0.0021,
|
| 5722 |
+
"step": 19725
|
| 5723 |
+
},
|
| 5724 |
+
{
|
| 5725 |
+
"epoch": 21.751101321585903,
|
| 5726 |
+
"grad_norm": 0.012911227531731129,
|
| 5727 |
+
"learning_rate": 2.564102564102564e-07,
|
| 5728 |
+
"loss": 0.0018,
|
| 5729 |
+
"step": 19750
|
| 5730 |
+
},
|
| 5731 |
+
{
|
| 5732 |
+
"epoch": 21.77863436123348,
|
| 5733 |
+
"grad_norm": 0.016304660588502884,
|
| 5734 |
+
"learning_rate": 2.307692307692308e-07,
|
| 5735 |
+
"loss": 0.0018,
|
| 5736 |
+
"step": 19775
|
| 5737 |
+
},
|
| 5738 |
+
{
|
| 5739 |
+
"epoch": 21.80616740088106,
|
| 5740 |
+
"grad_norm": 0.0178163331001997,
|
| 5741 |
+
"learning_rate": 2.0512820512820514e-07,
|
| 5742 |
+
"loss": 0.0018,
|
| 5743 |
+
"step": 19800
|
| 5744 |
+
},
|
| 5745 |
+
{
|
| 5746 |
+
"epoch": 21.833700440528634,
|
| 5747 |
+
"grad_norm": 0.013485315255820751,
|
| 5748 |
+
"learning_rate": 1.7948717948717948e-07,
|
| 5749 |
+
"loss": 0.0017,
|
| 5750 |
+
"step": 19825
|
| 5751 |
+
},
|
| 5752 |
+
{
|
| 5753 |
+
"epoch": 21.86123348017621,
|
| 5754 |
+
"grad_norm": 0.021611526608467102,
|
| 5755 |
+
"learning_rate": 1.5384615384615387e-07,
|
| 5756 |
+
"loss": 0.0018,
|
| 5757 |
+
"step": 19850
|
| 5758 |
+
},
|
| 5759 |
+
{
|
| 5760 |
+
"epoch": 21.88876651982379,
|
| 5761 |
+
"grad_norm": 0.014628293924033642,
|
| 5762 |
+
"learning_rate": 1.282051282051282e-07,
|
| 5763 |
+
"loss": 0.0017,
|
| 5764 |
+
"step": 19875
|
| 5765 |
+
},
|
| 5766 |
+
{
|
| 5767 |
+
"epoch": 21.916299559471366,
|
| 5768 |
+
"grad_norm": 0.013321286998689175,
|
| 5769 |
+
"learning_rate": 1.0256410256410257e-07,
|
| 5770 |
+
"loss": 0.0017,
|
| 5771 |
+
"step": 19900
|
| 5772 |
+
},
|
| 5773 |
+
{
|
| 5774 |
+
"epoch": 21.94383259911894,
|
| 5775 |
+
"grad_norm": 0.016186168417334557,
|
| 5776 |
+
"learning_rate": 7.692307692307694e-08,
|
| 5777 |
+
"loss": 0.0018,
|
| 5778 |
+
"step": 19925
|
| 5779 |
+
},
|
| 5780 |
+
{
|
| 5781 |
+
"epoch": 21.97136563876652,
|
| 5782 |
+
"grad_norm": 0.015817852690815926,
|
| 5783 |
+
"learning_rate": 5.1282051282051286e-08,
|
| 5784 |
+
"loss": 0.0017,
|
| 5785 |
+
"step": 19950
|
| 5786 |
+
},
|
| 5787 |
+
{
|
| 5788 |
+
"epoch": 21.998898678414097,
|
| 5789 |
+
"grad_norm": 0.01383238285779953,
|
| 5790 |
+
"learning_rate": 2.5641025641025643e-08,
|
| 5791 |
+
"loss": 0.0018,
|
| 5792 |
+
"step": 19975
|
| 5793 |
+
},
|
| 5794 |
+
{
|
| 5795 |
+
"epoch": 22.026431718061673,
|
| 5796 |
+
"grad_norm": 0.0143059641122818,
|
| 5797 |
+
"learning_rate": 0.0,
|
| 5798 |
+
"loss": 0.0017,
|
| 5799 |
+
"step": 20000
|
| 5800 |
+
},
|
| 5801 |
+
{
|
| 5802 |
+
"epoch": 22.026431718061673,
|
| 5803 |
+
"eval_cer": 54.87888757694909,
|
| 5804 |
+
"eval_loss": 1.0545215606689453,
|
| 5805 |
+
"eval_runtime": 819.2896,
|
| 5806 |
+
"eval_samples_per_second": 12.915,
|
| 5807 |
+
"eval_steps_per_second": 3.23,
|
| 5808 |
+
"eval_wer": 96.57708628005658,
|
| 5809 |
+
"step": 20000
|
| 5810 |
}
|
| 5811 |
],
|
| 5812 |
"logging_steps": 25,
|
|
|
|
| 5821 |
"should_evaluate": false,
|
| 5822 |
"should_log": false,
|
| 5823 |
"should_save": true,
|
| 5824 |
+
"should_training_stop": true
|
| 5825 |
},
|
| 5826 |
"attributes": {}
|
| 5827 |
}
|
| 5828 |
},
|
| 5829 |
+
"total_flos": 3.376341480070185e+19,
|
| 5830 |
"train_batch_size": 4,
|
| 5831 |
"trial_name": null,
|
| 5832 |
"trial_params": null
|