ihanif commited on
Commit
e97b524
·
1 Parent(s): e6cae52

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 12.5,
3
  "eval_loss": 0.8710034489631653,
4
- "eval_runtime": 202.2604,
5
- "eval_samples_per_second": 2.531,
6
- "eval_steps_per_second": 0.079,
7
  "eval_wer": 60.05599273607748,
8
- "train_loss": 0.31471561336517334,
9
- "train_runtime": 1275.0957,
10
- "train_samples_per_second": 25.096,
11
- "train_steps_per_second": 0.392
12
  }
 
1
  {
2
+ "epoch": 25.0,
3
  "eval_loss": 0.8710034489631653,
4
+ "eval_runtime": 324.5791,
5
+ "eval_samples_per_second": 1.577,
6
+ "eval_steps_per_second": 0.049,
7
  "eval_wer": 60.05599273607748,
8
+ "train_loss": 0.16543263983726503,
9
+ "train_runtime": 2579.5458,
10
+ "train_samples_per_second": 24.811,
11
+ "train_steps_per_second": 0.388
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 12.5,
3
  "eval_loss": 0.8710034489631653,
4
- "eval_runtime": 202.2604,
5
- "eval_samples_per_second": 2.531,
6
- "eval_steps_per_second": 0.079,
7
  "eval_wer": 60.05599273607748
8
  }
 
1
  {
2
+ "epoch": 25.0,
3
  "eval_loss": 0.8710034489631653,
4
+ "eval_runtime": 324.5791,
5
+ "eval_samples_per_second": 1.577,
6
+ "eval_steps_per_second": 0.049,
7
  "eval_wer": 60.05599273607748
8
  }
runs/Dec20_18-16-58_129-146-32-172/events.out.tfevents.1671563202.129-146-32-172.137520.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06e882b57bbb25ce16c01b5e19af5d55ef76cf57fb5e4836eeb3b94df95f92e
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 12.5,
3
- "train_loss": 0.31471561336517334,
4
- "train_runtime": 1275.0957,
5
- "train_samples_per_second": 25.096,
6
- "train_steps_per_second": 0.392
7
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "train_loss": 0.16543263983726503,
4
+ "train_runtime": 2579.5458,
5
+ "train_samples_per_second": 24.811,
6
+ "train_steps_per_second": 0.388
7
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 0.8710034489631653,
3
  "best_model_checkpoint": "./checkpoint-400",
4
- "epoch": 12.5,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -353,18 +353,363 @@
353
  "step": 500
354
  },
355
  {
356
- "epoch": 12.5,
357
- "step": 500,
358
- "total_flos": 2.02648572002304e+18,
359
- "train_loss": 0.31471561336517334,
360
- "train_runtime": 1275.0957,
361
- "train_samples_per_second": 25.096,
362
- "train_steps_per_second": 0.392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  }
364
  ],
365
- "max_steps": 500,
366
- "num_train_epochs": 13,
367
- "total_flos": 2.02648572002304e+18,
368
  "trial_name": null,
369
  "trial_params": null
370
  }
 
1
  {
2
  "best_metric": 0.8710034489631653,
3
  "best_model_checkpoint": "./checkpoint-400",
4
+ "epoch": 25.0,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
353
  "step": 500
354
  },
355
  {
356
+ "epoch": 12.75,
357
+ "learning_rate": 5.092783505154639e-06,
358
+ "loss": 0.4501,
359
+ "step": 510
360
+ },
361
+ {
362
+ "epoch": 13.0,
363
+ "learning_rate": 4.989690721649485e-06,
364
+ "loss": 0.4693,
365
+ "step": 520
366
+ },
367
+ {
368
+ "epoch": 13.25,
369
+ "learning_rate": 4.88659793814433e-06,
370
+ "loss": 0.4261,
371
+ "step": 530
372
+ },
373
+ {
374
+ "epoch": 13.5,
375
+ "learning_rate": 4.783505154639176e-06,
376
+ "loss": 0.4336,
377
+ "step": 540
378
+ },
379
+ {
380
+ "epoch": 13.75,
381
+ "learning_rate": 4.680412371134021e-06,
382
+ "loss": 0.4303,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 14.0,
387
+ "learning_rate": 4.577319587628866e-06,
388
+ "loss": 0.4231,
389
+ "step": 560
390
+ },
391
+ {
392
+ "epoch": 14.25,
393
+ "learning_rate": 4.474226804123712e-06,
394
+ "loss": 0.4178,
395
+ "step": 570
396
+ },
397
+ {
398
+ "epoch": 14.5,
399
+ "learning_rate": 4.371134020618557e-06,
400
+ "loss": 0.4046,
401
+ "step": 580
402
+ },
403
+ {
404
+ "epoch": 14.75,
405
+ "learning_rate": 4.2680412371134025e-06,
406
+ "loss": 0.3927,
407
+ "step": 590
408
+ },
409
+ {
410
+ "epoch": 15.0,
411
+ "learning_rate": 4.164948453608248e-06,
412
+ "loss": 0.3882,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 15.0,
417
+ "eval_loss": 0.8819407224655151,
418
+ "eval_runtime": 211.3554,
419
+ "eval_samples_per_second": 2.422,
420
+ "eval_steps_per_second": 0.076,
421
+ "eval_wer": 59.06476997578692,
422
+ "step": 600
423
+ },
424
+ {
425
+ "epoch": 15.25,
426
+ "learning_rate": 4.061855670103093e-06,
427
+ "loss": 0.372,
428
+ "step": 610
429
+ },
430
+ {
431
+ "epoch": 15.5,
432
+ "learning_rate": 3.958762886597938e-06,
433
+ "loss": 0.3832,
434
+ "step": 620
435
+ },
436
+ {
437
+ "epoch": 15.75,
438
+ "learning_rate": 3.855670103092784e-06,
439
+ "loss": 0.3756,
440
+ "step": 630
441
+ },
442
+ {
443
+ "epoch": 16.0,
444
+ "learning_rate": 3.752577319587629e-06,
445
+ "loss": 0.3818,
446
+ "step": 640
447
+ },
448
+ {
449
+ "epoch": 16.25,
450
+ "learning_rate": 3.6494845360824745e-06,
451
+ "loss": 0.3669,
452
+ "step": 650
453
+ },
454
+ {
455
+ "epoch": 16.5,
456
+ "learning_rate": 3.54639175257732e-06,
457
+ "loss": 0.3481,
458
+ "step": 660
459
+ },
460
+ {
461
+ "epoch": 16.75,
462
+ "learning_rate": 3.443298969072165e-06,
463
+ "loss": 0.3568,
464
+ "step": 670
465
+ },
466
+ {
467
+ "epoch": 17.0,
468
+ "learning_rate": 3.3402061855670103e-06,
469
+ "loss": 0.3567,
470
+ "step": 680
471
+ },
472
+ {
473
+ "epoch": 17.25,
474
+ "learning_rate": 3.2371134020618563e-06,
475
+ "loss": 0.3513,
476
+ "step": 690
477
+ },
478
+ {
479
+ "epoch": 17.5,
480
+ "learning_rate": 3.1340206185567014e-06,
481
+ "loss": 0.3346,
482
+ "step": 700
483
+ },
484
+ {
485
+ "epoch": 17.5,
486
+ "eval_loss": 0.9031797647476196,
487
+ "eval_runtime": 183.8672,
488
+ "eval_samples_per_second": 2.785,
489
+ "eval_steps_per_second": 0.087,
490
+ "eval_wer": 59.480932203389834,
491
+ "step": 700
492
+ },
493
+ {
494
+ "epoch": 17.75,
495
+ "learning_rate": 3.0309278350515465e-06,
496
+ "loss": 0.3322,
497
+ "step": 710
498
+ },
499
+ {
500
+ "epoch": 18.0,
501
+ "learning_rate": 2.927835051546392e-06,
502
+ "loss": 0.3114,
503
+ "step": 720
504
+ },
505
+ {
506
+ "epoch": 18.25,
507
+ "learning_rate": 2.8247422680412372e-06,
508
+ "loss": 0.3155,
509
+ "step": 730
510
+ },
511
+ {
512
+ "epoch": 18.5,
513
+ "learning_rate": 2.7216494845360823e-06,
514
+ "loss": 0.3295,
515
+ "step": 740
516
+ },
517
+ {
518
+ "epoch": 18.75,
519
+ "learning_rate": 2.6185567010309283e-06,
520
+ "loss": 0.314,
521
+ "step": 750
522
+ },
523
+ {
524
+ "epoch": 19.0,
525
+ "learning_rate": 2.5154639175257734e-06,
526
+ "loss": 0.3068,
527
+ "step": 760
528
+ },
529
+ {
530
+ "epoch": 19.25,
531
+ "learning_rate": 2.412371134020619e-06,
532
+ "loss": 0.312,
533
+ "step": 770
534
+ },
535
+ {
536
+ "epoch": 19.5,
537
+ "learning_rate": 2.309278350515464e-06,
538
+ "loss": 0.2938,
539
+ "step": 780
540
+ },
541
+ {
542
+ "epoch": 19.75,
543
+ "learning_rate": 2.2061855670103092e-06,
544
+ "loss": 0.3052,
545
+ "step": 790
546
+ },
547
+ {
548
+ "epoch": 20.0,
549
+ "learning_rate": 2.1030927835051548e-06,
550
+ "loss": 0.2947,
551
+ "step": 800
552
+ },
553
+ {
554
+ "epoch": 20.0,
555
+ "eval_loss": 0.9144465923309326,
556
+ "eval_runtime": 185.6121,
557
+ "eval_samples_per_second": 2.758,
558
+ "eval_steps_per_second": 0.086,
559
+ "eval_wer": 59.76846246973365,
560
+ "step": 800
561
+ },
562
+ {
563
+ "epoch": 20.25,
564
+ "learning_rate": 2.0000000000000003e-06,
565
+ "loss": 0.2897,
566
+ "step": 810
567
+ },
568
+ {
569
+ "epoch": 20.5,
570
+ "learning_rate": 1.8969072164948455e-06,
571
+ "loss": 0.2856,
572
+ "step": 820
573
+ },
574
+ {
575
+ "epoch": 20.75,
576
+ "learning_rate": 1.7938144329896908e-06,
577
+ "loss": 0.3027,
578
+ "step": 830
579
+ },
580
+ {
581
+ "epoch": 21.0,
582
+ "learning_rate": 1.6907216494845363e-06,
583
+ "loss": 0.3446,
584
+ "step": 840
585
+ },
586
+ {
587
+ "epoch": 21.25,
588
+ "learning_rate": 1.5876288659793815e-06,
589
+ "loss": 0.2766,
590
+ "step": 850
591
+ },
592
+ {
593
+ "epoch": 21.5,
594
+ "learning_rate": 1.484536082474227e-06,
595
+ "loss": 0.2816,
596
+ "step": 860
597
+ },
598
+ {
599
+ "epoch": 21.75,
600
+ "learning_rate": 1.3814432989690724e-06,
601
+ "loss": 0.2845,
602
+ "step": 870
603
+ },
604
+ {
605
+ "epoch": 22.0,
606
+ "learning_rate": 1.2783505154639175e-06,
607
+ "loss": 0.2828,
608
+ "step": 880
609
+ },
610
+ {
611
+ "epoch": 22.25,
612
+ "learning_rate": 1.175257731958763e-06,
613
+ "loss": 0.2766,
614
+ "step": 890
615
+ },
616
+ {
617
+ "epoch": 22.5,
618
+ "learning_rate": 1.0721649484536084e-06,
619
+ "loss": 0.2724,
620
+ "step": 900
621
+ },
622
+ {
623
+ "epoch": 22.5,
624
+ "eval_loss": 0.9288888573646545,
625
+ "eval_runtime": 275.793,
626
+ "eval_samples_per_second": 1.856,
627
+ "eval_steps_per_second": 0.058,
628
+ "eval_wer": 58.98153753026635,
629
+ "step": 900
630
+ },
631
+ {
632
+ "epoch": 22.75,
633
+ "learning_rate": 9.690721649484537e-07,
634
+ "loss": 0.267,
635
+ "step": 910
636
+ },
637
+ {
638
+ "epoch": 23.0,
639
+ "learning_rate": 8.65979381443299e-07,
640
+ "loss": 0.2602,
641
+ "step": 920
642
+ },
643
+ {
644
+ "epoch": 23.25,
645
+ "learning_rate": 7.628865979381445e-07,
646
+ "loss": 0.2692,
647
+ "step": 930
648
+ },
649
+ {
650
+ "epoch": 23.5,
651
+ "learning_rate": 6.597938144329897e-07,
652
+ "loss": 0.2623,
653
+ "step": 940
654
+ },
655
+ {
656
+ "epoch": 23.75,
657
+ "learning_rate": 5.567010309278352e-07,
658
+ "loss": 0.2729,
659
+ "step": 950
660
+ },
661
+ {
662
+ "epoch": 24.0,
663
+ "learning_rate": 4.5360824742268044e-07,
664
+ "loss": 0.263,
665
+ "step": 960
666
+ },
667
+ {
668
+ "epoch": 24.25,
669
+ "learning_rate": 3.505154639175258e-07,
670
+ "loss": 0.2656,
671
+ "step": 970
672
+ },
673
+ {
674
+ "epoch": 24.5,
675
+ "learning_rate": 2.474226804123711e-07,
676
+ "loss": 0.2633,
677
+ "step": 980
678
+ },
679
+ {
680
+ "epoch": 24.75,
681
+ "learning_rate": 1.443298969072165e-07,
682
+ "loss": 0.2661,
683
+ "step": 990
684
+ },
685
+ {
686
+ "epoch": 25.0,
687
+ "learning_rate": 4.123711340206186e-08,
688
+ "loss": 0.2785,
689
+ "step": 1000
690
+ },
691
+ {
692
+ "epoch": 25.0,
693
+ "eval_loss": 0.933917224407196,
694
+ "eval_runtime": 339.9994,
695
+ "eval_samples_per_second": 1.506,
696
+ "eval_steps_per_second": 0.047,
697
+ "eval_wer": 59.200968523002416,
698
+ "step": 1000
699
+ },
700
+ {
701
+ "epoch": 25.0,
702
+ "step": 1000,
703
+ "total_flos": 4.048885260288e+18,
704
+ "train_loss": 0.16543263983726503,
705
+ "train_runtime": 2579.5458,
706
+ "train_samples_per_second": 24.811,
707
+ "train_steps_per_second": 0.388
708
  }
709
  ],
710
+ "max_steps": 1000,
711
+ "num_train_epochs": 25,
712
+ "total_flos": 4.048885260288e+18,
713
  "trial_name": null,
714
  "trial_params": null
715
  }