kiritan commited on
Commit
414b929
·
verified ·
1 Parent(s): 5bafde6

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55a76ad65f57e1dd26d27153e5221199b53987d1ec13aa58c18022d980ed552f
3
- size 761059696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7072d40044ad4e8f11191d79b2aa90d677d09624e8e2ae612da9850b526cffca
3
+ size 5117197020
last-checkpoint/global_step3000/mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26dcb95f20e6f1938ba4a16eb9b378cec189bdd0e8a9434879a0785a4da722a5
3
- size 129965712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a5126625cd292b4f4d5e948c09dd37887dbb4bf5ef92a93c5bc13ab40d2c37
3
+ size 859127504
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1000
 
1
+ global_step3000
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1dc65a919207b2a3b5b11bfc219bc2d25118e6d82922d4199c93987b5bb6425
3
  size 962205216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:610d4192b27e6309a44afb5d80a9c25b6f192f5989c5f8fb4e13c7b43939651e
3
  size 962205216
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:debdd7498ddcd6232955344ea92b576c95dafdfe1d109e1af69671383a9f0cb2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eaee0853f95d35cacfb932d41346ef50714e4ef121f4dd2abff57eddeebe889
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbadae7a908ca2ec608dd3ceac8b5aab1986323a21358ba9a060e8f696f7e6fb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536a9a8504fffb5687874aeab2eb4bb450e59d1e430c707280de4db4bc58c5a4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 97.56718528995756,
3
- "best_model_checkpoint": "./iteboshi_temp/checkpoint-1000",
4
- "epoch": 1.1013215859030836,
5
  "eval_steps": 1000,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -297,6 +297,586 @@
297
  "eval_steps_per_second": 1.472,
298
  "eval_wer": 97.56718528995756,
299
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  }
301
  ],
302
  "logging_steps": 25,
@@ -316,7 +896,7 @@
316
  "attributes": {}
317
  }
318
  },
319
- "total_flos": 1.7181431864900977e+19,
320
  "train_batch_size": 4,
321
  "trial_name": null,
322
  "trial_params": null
 
1
  {
2
+ "best_metric": 87.76991984912777,
3
+ "best_model_checkpoint": "./iteboshi_temp/checkpoint-3000",
4
+ "epoch": 3.303964757709251,
5
  "eval_steps": 1000,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
297
  "eval_steps_per_second": 1.472,
298
  "eval_wer": 97.56718528995756,
299
  "step": 1000
300
+ },
301
+ {
302
+ "epoch": 1.1288546255506609,
303
+ "grad_norm": 1.21933913230896,
304
+ "learning_rate": 1.9461538461538462e-05,
305
+ "loss": 1.0609,
306
+ "step": 1025
307
+ },
308
+ {
309
+ "epoch": 1.1563876651982379,
310
+ "grad_norm": 1.4192328453063965,
311
+ "learning_rate": 1.943589743589744e-05,
312
+ "loss": 1.0849,
313
+ "step": 1050
314
+ },
315
+ {
316
+ "epoch": 1.183920704845815,
317
+ "grad_norm": 1.730343222618103,
318
+ "learning_rate": 1.9410256410256413e-05,
319
+ "loss": 1.0461,
320
+ "step": 1075
321
+ },
322
+ {
323
+ "epoch": 1.2114537444933922,
324
+ "grad_norm": 1.7515013217926025,
325
+ "learning_rate": 1.9384615384615386e-05,
326
+ "loss": 0.9945,
327
+ "step": 1100
328
+ },
329
+ {
330
+ "epoch": 1.2389867841409692,
331
+ "grad_norm": 2.047463893890381,
332
+ "learning_rate": 1.935897435897436e-05,
333
+ "loss": 1.0707,
334
+ "step": 1125
335
+ },
336
+ {
337
+ "epoch": 1.2665198237885462,
338
+ "grad_norm": 1.3279300928115845,
339
+ "learning_rate": 1.9333333333333333e-05,
340
+ "loss": 0.985,
341
+ "step": 1150
342
+ },
343
+ {
344
+ "epoch": 1.2940528634361232,
345
+ "grad_norm": 1.2352490425109863,
346
+ "learning_rate": 1.930769230769231e-05,
347
+ "loss": 0.9729,
348
+ "step": 1175
349
+ },
350
+ {
351
+ "epoch": 1.3215859030837005,
352
+ "grad_norm": 1.4669734239578247,
353
+ "learning_rate": 1.9282051282051284e-05,
354
+ "loss": 0.9921,
355
+ "step": 1200
356
+ },
357
+ {
358
+ "epoch": 1.3491189427312775,
359
+ "grad_norm": 1.233565092086792,
360
+ "learning_rate": 1.9256410256410258e-05,
361
+ "loss": 0.9546,
362
+ "step": 1225
363
+ },
364
+ {
365
+ "epoch": 1.3766519823788546,
366
+ "grad_norm": 1.4740595817565918,
367
+ "learning_rate": 1.923076923076923e-05,
368
+ "loss": 0.9798,
369
+ "step": 1250
370
+ },
371
+ {
372
+ "epoch": 1.4041850220264318,
373
+ "grad_norm": 1.0927783250808716,
374
+ "learning_rate": 1.920512820512821e-05,
375
+ "loss": 0.9465,
376
+ "step": 1275
377
+ },
378
+ {
379
+ "epoch": 1.4317180616740088,
380
+ "grad_norm": 1.5051064491271973,
381
+ "learning_rate": 1.9179487179487182e-05,
382
+ "loss": 0.9202,
383
+ "step": 1300
384
+ },
385
+ {
386
+ "epoch": 1.4592511013215859,
387
+ "grad_norm": 1.218206524848938,
388
+ "learning_rate": 1.9153846153846156e-05,
389
+ "loss": 0.9766,
390
+ "step": 1325
391
+ },
392
+ {
393
+ "epoch": 1.4867841409691631,
394
+ "grad_norm": 1.3299014568328857,
395
+ "learning_rate": 1.912820512820513e-05,
396
+ "loss": 0.9298,
397
+ "step": 1350
398
+ },
399
+ {
400
+ "epoch": 1.51431718061674,
401
+ "grad_norm": 2.0308456420898438,
402
+ "learning_rate": 1.9102564102564106e-05,
403
+ "loss": 0.9287,
404
+ "step": 1375
405
+ },
406
+ {
407
+ "epoch": 1.5418502202643172,
408
+ "grad_norm": 1.0478333234786987,
409
+ "learning_rate": 1.907692307692308e-05,
410
+ "loss": 0.9302,
411
+ "step": 1400
412
+ },
413
+ {
414
+ "epoch": 1.5693832599118944,
415
+ "grad_norm": 1.2070943117141724,
416
+ "learning_rate": 1.905128205128205e-05,
417
+ "loss": 0.9162,
418
+ "step": 1425
419
+ },
420
+ {
421
+ "epoch": 1.5969162995594712,
422
+ "grad_norm": 1.317423701286316,
423
+ "learning_rate": 1.9025641025641027e-05,
424
+ "loss": 0.9029,
425
+ "step": 1450
426
+ },
427
+ {
428
+ "epoch": 1.6244493392070485,
429
+ "grad_norm": 1.3458503484725952,
430
+ "learning_rate": 1.9e-05,
431
+ "loss": 0.8649,
432
+ "step": 1475
433
+ },
434
+ {
435
+ "epoch": 1.6519823788546255,
436
+ "grad_norm": 1.8415597677230835,
437
+ "learning_rate": 1.8974358974358975e-05,
438
+ "loss": 0.8413,
439
+ "step": 1500
440
+ },
441
+ {
442
+ "epoch": 1.6795154185022025,
443
+ "grad_norm": 1.2922658920288086,
444
+ "learning_rate": 1.894871794871795e-05,
445
+ "loss": 0.8506,
446
+ "step": 1525
447
+ },
448
+ {
449
+ "epoch": 1.7070484581497798,
450
+ "grad_norm": 1.3912965059280396,
451
+ "learning_rate": 1.8923076923076925e-05,
452
+ "loss": 0.8408,
453
+ "step": 1550
454
+ },
455
+ {
456
+ "epoch": 1.7345814977973568,
457
+ "grad_norm": 1.5371092557907104,
458
+ "learning_rate": 1.88974358974359e-05,
459
+ "loss": 0.8579,
460
+ "step": 1575
461
+ },
462
+ {
463
+ "epoch": 1.7621145374449338,
464
+ "grad_norm": 1.188888669013977,
465
+ "learning_rate": 1.8871794871794873e-05,
466
+ "loss": 0.8251,
467
+ "step": 1600
468
+ },
469
+ {
470
+ "epoch": 1.789647577092511,
471
+ "grad_norm": 1.2093167304992676,
472
+ "learning_rate": 1.8846153846153846e-05,
473
+ "loss": 0.8787,
474
+ "step": 1625
475
+ },
476
+ {
477
+ "epoch": 1.8171806167400881,
478
+ "grad_norm": 1.3911653757095337,
479
+ "learning_rate": 1.8820512820512823e-05,
480
+ "loss": 0.8652,
481
+ "step": 1650
482
+ },
483
+ {
484
+ "epoch": 1.8447136563876652,
485
+ "grad_norm": 1.707056999206543,
486
+ "learning_rate": 1.8794871794871797e-05,
487
+ "loss": 0.8693,
488
+ "step": 1675
489
+ },
490
+ {
491
+ "epoch": 1.8722466960352424,
492
+ "grad_norm": 1.1974895000457764,
493
+ "learning_rate": 1.876923076923077e-05,
494
+ "loss": 0.8092,
495
+ "step": 1700
496
+ },
497
+ {
498
+ "epoch": 1.8997797356828194,
499
+ "grad_norm": 1.9799768924713135,
500
+ "learning_rate": 1.8743589743589744e-05,
501
+ "loss": 0.8103,
502
+ "step": 1725
503
+ },
504
+ {
505
+ "epoch": 1.9273127753303965,
506
+ "grad_norm": 1.5621815919876099,
507
+ "learning_rate": 1.8717948717948718e-05,
508
+ "loss": 0.8212,
509
+ "step": 1750
510
+ },
511
+ {
512
+ "epoch": 1.9548458149779737,
513
+ "grad_norm": 1.3315322399139404,
514
+ "learning_rate": 1.8692307692307695e-05,
515
+ "loss": 0.813,
516
+ "step": 1775
517
+ },
518
+ {
519
+ "epoch": 1.9823788546255505,
520
+ "grad_norm": 0.9858968257904053,
521
+ "learning_rate": 1.866666666666667e-05,
522
+ "loss": 0.7959,
523
+ "step": 1800
524
+ },
525
+ {
526
+ "epoch": 2.0099118942731278,
527
+ "grad_norm": 1.0958722829818726,
528
+ "learning_rate": 1.8641025641025642e-05,
529
+ "loss": 0.7545,
530
+ "step": 1825
531
+ },
532
+ {
533
+ "epoch": 2.037444933920705,
534
+ "grad_norm": 1.0353975296020508,
535
+ "learning_rate": 1.8615384615384616e-05,
536
+ "loss": 0.5917,
537
+ "step": 1850
538
+ },
539
+ {
540
+ "epoch": 2.064977973568282,
541
+ "grad_norm": 2.9925360679626465,
542
+ "learning_rate": 1.8589743589743593e-05,
543
+ "loss": 0.5989,
544
+ "step": 1875
545
+ },
546
+ {
547
+ "epoch": 2.092511013215859,
548
+ "grad_norm": 1.3685253858566284,
549
+ "learning_rate": 1.8564102564102567e-05,
550
+ "loss": 0.605,
551
+ "step": 1900
552
+ },
553
+ {
554
+ "epoch": 2.1200440528634363,
555
+ "grad_norm": 1.0744121074676514,
556
+ "learning_rate": 1.853846153846154e-05,
557
+ "loss": 0.6191,
558
+ "step": 1925
559
+ },
560
+ {
561
+ "epoch": 2.147577092511013,
562
+ "grad_norm": 0.898098349571228,
563
+ "learning_rate": 1.8512820512820514e-05,
564
+ "loss": 0.5486,
565
+ "step": 1950
566
+ },
567
+ {
568
+ "epoch": 2.1751101321585904,
569
+ "grad_norm": 1.2373496294021606,
570
+ "learning_rate": 1.848717948717949e-05,
571
+ "loss": 0.5693,
572
+ "step": 1975
573
+ },
574
+ {
575
+ "epoch": 2.202643171806167,
576
+ "grad_norm": 1.3621195554733276,
577
+ "learning_rate": 1.8461538461538465e-05,
578
+ "loss": 0.5859,
579
+ "step": 2000
580
+ },
581
+ {
582
+ "epoch": 2.202643171806167,
583
+ "eval_cer": 48.10966033496498,
584
+ "eval_loss": 0.8996243476867676,
585
+ "eval_runtime": 1860.7163,
586
+ "eval_samples_per_second": 5.687,
587
+ "eval_steps_per_second": 1.422,
588
+ "eval_wer": 90.94766619519095,
589
+ "step": 2000
590
+ },
591
+ {
592
+ "epoch": 2.2301762114537445,
593
+ "grad_norm": 0.9919908046722412,
594
+ "learning_rate": 1.8435897435897435e-05,
595
+ "loss": 0.5958,
596
+ "step": 2025
597
+ },
598
+ {
599
+ "epoch": 2.2577092511013217,
600
+ "grad_norm": 1.0579532384872437,
601
+ "learning_rate": 1.8410256410256412e-05,
602
+ "loss": 0.5828,
603
+ "step": 2050
604
+ },
605
+ {
606
+ "epoch": 2.2852422907488985,
607
+ "grad_norm": 0.8056641221046448,
608
+ "learning_rate": 1.8384615384615386e-05,
609
+ "loss": 0.5666,
610
+ "step": 2075
611
+ },
612
+ {
613
+ "epoch": 2.3127753303964758,
614
+ "grad_norm": 1.1343415975570679,
615
+ "learning_rate": 1.835897435897436e-05,
616
+ "loss": 0.577,
617
+ "step": 2100
618
+ },
619
+ {
620
+ "epoch": 2.340308370044053,
621
+ "grad_norm": 1.04411780834198,
622
+ "learning_rate": 1.8333333333333333e-05,
623
+ "loss": 0.5307,
624
+ "step": 2125
625
+ },
626
+ {
627
+ "epoch": 2.36784140969163,
628
+ "grad_norm": 1.0452271699905396,
629
+ "learning_rate": 1.830769230769231e-05,
630
+ "loss": 0.5545,
631
+ "step": 2150
632
+ },
633
+ {
634
+ "epoch": 2.395374449339207,
635
+ "grad_norm": 0.927592396736145,
636
+ "learning_rate": 1.8282051282051284e-05,
637
+ "loss": 0.5551,
638
+ "step": 2175
639
+ },
640
+ {
641
+ "epoch": 2.4229074889867843,
642
+ "grad_norm": 1.7057969570159912,
643
+ "learning_rate": 1.8256410256410257e-05,
644
+ "loss": 0.5326,
645
+ "step": 2200
646
+ },
647
+ {
648
+ "epoch": 2.450440528634361,
649
+ "grad_norm": 1.4575523138046265,
650
+ "learning_rate": 1.823076923076923e-05,
651
+ "loss": 0.5308,
652
+ "step": 2225
653
+ },
654
+ {
655
+ "epoch": 2.4779735682819384,
656
+ "grad_norm": 1.0914602279663086,
657
+ "learning_rate": 1.8205128205128208e-05,
658
+ "loss": 0.5547,
659
+ "step": 2250
660
+ },
661
+ {
662
+ "epoch": 2.505506607929515,
663
+ "grad_norm": 0.990104079246521,
664
+ "learning_rate": 1.817948717948718e-05,
665
+ "loss": 0.6072,
666
+ "step": 2275
667
+ },
668
+ {
669
+ "epoch": 2.5330396475770924,
670
+ "grad_norm": 0.8314220905303955,
671
+ "learning_rate": 1.8153846153846155e-05,
672
+ "loss": 0.5671,
673
+ "step": 2300
674
+ },
675
+ {
676
+ "epoch": 2.5605726872246697,
677
+ "grad_norm": 0.9760991334915161,
678
+ "learning_rate": 1.812820512820513e-05,
679
+ "loss": 0.5421,
680
+ "step": 2325
681
+ },
682
+ {
683
+ "epoch": 2.5881057268722465,
684
+ "grad_norm": 1.0801244974136353,
685
+ "learning_rate": 1.8102564102564102e-05,
686
+ "loss": 0.5558,
687
+ "step": 2350
688
+ },
689
+ {
690
+ "epoch": 2.6156387665198237,
691
+ "grad_norm": 0.8499842286109924,
692
+ "learning_rate": 1.807692307692308e-05,
693
+ "loss": 0.56,
694
+ "step": 2375
695
+ },
696
+ {
697
+ "epoch": 2.643171806167401,
698
+ "grad_norm": 1.1915971040725708,
699
+ "learning_rate": 1.8051282051282053e-05,
700
+ "loss": 0.5198,
701
+ "step": 2400
702
+ },
703
+ {
704
+ "epoch": 2.670704845814978,
705
+ "grad_norm": 1.093216061592102,
706
+ "learning_rate": 1.8025641025641027e-05,
707
+ "loss": 0.5156,
708
+ "step": 2425
709
+ },
710
+ {
711
+ "epoch": 2.698237885462555,
712
+ "grad_norm": 1.1357547044754028,
713
+ "learning_rate": 1.8e-05,
714
+ "loss": 0.5755,
715
+ "step": 2450
716
+ },
717
+ {
718
+ "epoch": 2.7257709251101323,
719
+ "grad_norm": 0.963991641998291,
720
+ "learning_rate": 1.7974358974358977e-05,
721
+ "loss": 0.5219,
722
+ "step": 2475
723
+ },
724
+ {
725
+ "epoch": 2.753303964757709,
726
+ "grad_norm": 2.196319341659546,
727
+ "learning_rate": 1.794871794871795e-05,
728
+ "loss": 0.5858,
729
+ "step": 2500
730
+ },
731
+ {
732
+ "epoch": 2.7808370044052864,
733
+ "grad_norm": 1.075908899307251,
734
+ "learning_rate": 1.7923076923076925e-05,
735
+ "loss": 0.5284,
736
+ "step": 2525
737
+ },
738
+ {
739
+ "epoch": 2.8083700440528636,
740
+ "grad_norm": 1.052140712738037,
741
+ "learning_rate": 1.78974358974359e-05,
742
+ "loss": 0.4964,
743
+ "step": 2550
744
+ },
745
+ {
746
+ "epoch": 2.8359030837004404,
747
+ "grad_norm": 0.9454672336578369,
748
+ "learning_rate": 1.7871794871794875e-05,
749
+ "loss": 0.5225,
750
+ "step": 2575
751
+ },
752
+ {
753
+ "epoch": 2.8634361233480177,
754
+ "grad_norm": 0.8262547850608826,
755
+ "learning_rate": 1.784615384615385e-05,
756
+ "loss": 0.5573,
757
+ "step": 2600
758
+ },
759
+ {
760
+ "epoch": 2.890969162995595,
761
+ "grad_norm": 1.0611587762832642,
762
+ "learning_rate": 1.7820512820512823e-05,
763
+ "loss": 0.5183,
764
+ "step": 2625
765
+ },
766
+ {
767
+ "epoch": 2.9185022026431717,
768
+ "grad_norm": 0.7847844958305359,
769
+ "learning_rate": 1.7794871794871796e-05,
770
+ "loss": 0.5333,
771
+ "step": 2650
772
+ },
773
+ {
774
+ "epoch": 2.946035242290749,
775
+ "grad_norm": 0.746285617351532,
776
+ "learning_rate": 1.776923076923077e-05,
777
+ "loss": 0.5264,
778
+ "step": 2675
779
+ },
780
+ {
781
+ "epoch": 2.9735682819383262,
782
+ "grad_norm": 1.381616234779358,
783
+ "learning_rate": 1.7743589743589744e-05,
784
+ "loss": 0.5074,
785
+ "step": 2700
786
+ },
787
+ {
788
+ "epoch": 3.001101321585903,
789
+ "grad_norm": 0.6723135113716125,
790
+ "learning_rate": 1.7717948717948717e-05,
791
+ "loss": 0.5631,
792
+ "step": 2725
793
+ },
794
+ {
795
+ "epoch": 3.0286343612334803,
796
+ "grad_norm": 0.9439449906349182,
797
+ "learning_rate": 1.7692307692307694e-05,
798
+ "loss": 0.3836,
799
+ "step": 2750
800
+ },
801
+ {
802
+ "epoch": 3.056167400881057,
803
+ "grad_norm": 0.9093062281608582,
804
+ "learning_rate": 1.7666666666666668e-05,
805
+ "loss": 0.342,
806
+ "step": 2775
807
+ },
808
+ {
809
+ "epoch": 3.0837004405286343,
810
+ "grad_norm": 0.7883495092391968,
811
+ "learning_rate": 1.7641025641025642e-05,
812
+ "loss": 0.3678,
813
+ "step": 2800
814
+ },
815
+ {
816
+ "epoch": 3.1112334801762116,
817
+ "grad_norm": 0.5074595808982849,
818
+ "learning_rate": 1.7615384615384615e-05,
819
+ "loss": 0.3406,
820
+ "step": 2825
821
+ },
822
+ {
823
+ "epoch": 3.1387665198237884,
824
+ "grad_norm": 1.330426812171936,
825
+ "learning_rate": 1.7589743589743592e-05,
826
+ "loss": 0.3432,
827
+ "step": 2850
828
+ },
829
+ {
830
+ "epoch": 3.1662995594713657,
831
+ "grad_norm": 1.008254051208496,
832
+ "learning_rate": 1.7564102564102566e-05,
833
+ "loss": 0.3722,
834
+ "step": 2875
835
+ },
836
+ {
837
+ "epoch": 3.193832599118943,
838
+ "grad_norm": 1.0520501136779785,
839
+ "learning_rate": 1.753846153846154e-05,
840
+ "loss": 0.3719,
841
+ "step": 2900
842
+ },
843
+ {
844
+ "epoch": 3.2213656387665197,
845
+ "grad_norm": 0.7822287082672119,
846
+ "learning_rate": 1.7512820512820513e-05,
847
+ "loss": 0.3729,
848
+ "step": 2925
849
+ },
850
+ {
851
+ "epoch": 3.248898678414097,
852
+ "grad_norm": 1.1690279245376587,
853
+ "learning_rate": 1.7487179487179487e-05,
854
+ "loss": 0.3723,
855
+ "step": 2950
856
+ },
857
+ {
858
+ "epoch": 3.2764317180616738,
859
+ "grad_norm": 0.8030567765235901,
860
+ "learning_rate": 1.7461538461538464e-05,
861
+ "loss": 0.362,
862
+ "step": 2975
863
+ },
864
+ {
865
+ "epoch": 3.303964757709251,
866
+ "grad_norm": 0.7881470918655396,
867
+ "learning_rate": 1.7435897435897438e-05,
868
+ "loss": 0.3373,
869
+ "step": 3000
870
+ },
871
+ {
872
+ "epoch": 3.303964757709251,
873
+ "eval_cer": 29.99495603727947,
874
+ "eval_loss": 0.7765971422195435,
875
+ "eval_runtime": 1749.0783,
876
+ "eval_samples_per_second": 6.049,
877
+ "eval_steps_per_second": 1.513,
878
+ "eval_wer": 87.76991984912777,
879
+ "step": 3000
880
  }
881
  ],
882
  "logging_steps": 25,
 
896
  "attributes": {}
897
  }
898
  },
899
+ "total_flos": 5.154429559470293e+19,
900
  "train_batch_size": 4,
901
  "trial_name": null,
902
  "trial_params": null