dq158 commited on
Commit
7fea8dd
·
1 Parent(s): 8239c57

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de02ef88a9c06d1c4c39ca3c4c2f1202553b57c8ad4795abcd030e9f5ec610f8
3
  size 1980860410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a719ae057e05af32962d3c0b0a042e87e0340cd8be875b7011b5d7c0a11eb6c
3
  size 1980860410
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef45bf07e4aa6ce72fbbd0a19258dbb992063d4bf6b046ce62330fa93b06089d
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20dab4a02d1fc1823157099879eca284bd66f0c8febf3dba5cfc87be7c9c9028
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c1fd7d1cea75e4e374c54e815a21a8bbeecb72f564c49061658d53b541359df
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfda48251ec49dae4ce59144bf0a41ddf1eaebff873a2c756112a5149466e4b8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dde9ec5a9c5afe2585b7bb2f95d066fbcef5ffc592770aca32d5e4130db08ddc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5cbe3af0ede50d3acac3ddaa7d06c821cbab27a0479e23f99d3fb100db2c5e0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.8330533504486084,
3
- "best_model_checkpoint": "dq158/morbius/checkpoint-38272",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 38272,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -539,13 +539,146 @@
539
  "eval_steps_per_second": 1.311,
540
  "eval_translation_length": 105535,
541
  "step": 38272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  }
543
  ],
544
  "logging_steps": 500,
545
  "max_steps": 47840,
546
  "num_train_epochs": 5,
547
  "save_steps": 500,
548
- "total_flos": 3.1445701042058035e+17,
549
  "trial_name": null,
550
  "trial_params": null
551
  }
 
1
  {
2
+ "best_metric": 1.8320603370666504,
3
+ "best_model_checkpoint": "dq158/morbius/checkpoint-47840",
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 47840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
539
  "eval_steps_per_second": 1.311,
540
  "eval_translation_length": 105535,
541
  "step": 38272
542
+ },
543
+ {
544
+ "epoch": 4.02,
545
+ "learning_rate": 9.76170568561873e-06,
546
+ "loss": 1.9148,
547
+ "step": 38500
548
+ },
549
+ {
550
+ "epoch": 4.08,
551
+ "learning_rate": 9.239130434782608e-06,
552
+ "loss": 1.9031,
553
+ "step": 39000
554
+ },
555
+ {
556
+ "epoch": 4.13,
557
+ "learning_rate": 8.716555183946488e-06,
558
+ "loss": 1.8884,
559
+ "step": 39500
560
+ },
561
+ {
562
+ "epoch": 4.18,
563
+ "learning_rate": 8.193979933110369e-06,
564
+ "loss": 1.8267,
565
+ "step": 40000
566
+ },
567
+ {
568
+ "epoch": 4.23,
569
+ "learning_rate": 7.671404682274248e-06,
570
+ "loss": 1.8556,
571
+ "step": 40500
572
+ },
573
+ {
574
+ "epoch": 4.29,
575
+ "learning_rate": 7.148829431438127e-06,
576
+ "loss": 1.9098,
577
+ "step": 41000
578
+ },
579
+ {
580
+ "epoch": 4.34,
581
+ "learning_rate": 6.6262541806020064e-06,
582
+ "loss": 1.8693,
583
+ "step": 41500
584
+ },
585
+ {
586
+ "epoch": 4.39,
587
+ "learning_rate": 6.103678929765887e-06,
588
+ "loss": 1.8622,
589
+ "step": 42000
590
+ },
591
+ {
592
+ "epoch": 4.44,
593
+ "learning_rate": 5.581103678929766e-06,
594
+ "loss": 1.8848,
595
+ "step": 42500
596
+ },
597
+ {
598
+ "epoch": 4.49,
599
+ "learning_rate": 5.0585284280936456e-06,
600
+ "loss": 1.8998,
601
+ "step": 43000
602
+ },
603
+ {
604
+ "epoch": 4.55,
605
+ "learning_rate": 4.535953177257525e-06,
606
+ "loss": 1.9059,
607
+ "step": 43500
608
+ },
609
+ {
610
+ "epoch": 4.6,
611
+ "learning_rate": 4.013377926421405e-06,
612
+ "loss": 1.8852,
613
+ "step": 44000
614
+ },
615
+ {
616
+ "epoch": 4.65,
617
+ "learning_rate": 3.4908026755852843e-06,
618
+ "loss": 1.8984,
619
+ "step": 44500
620
+ },
621
+ {
622
+ "epoch": 4.7,
623
+ "learning_rate": 2.968227424749164e-06,
624
+ "loss": 1.8517,
625
+ "step": 45000
626
+ },
627
+ {
628
+ "epoch": 4.76,
629
+ "learning_rate": 2.4456521739130437e-06,
630
+ "loss": 1.917,
631
+ "step": 45500
632
+ },
633
+ {
634
+ "epoch": 4.81,
635
+ "learning_rate": 1.9230769230769234e-06,
636
+ "loss": 1.9134,
637
+ "step": 46000
638
+ },
639
+ {
640
+ "epoch": 4.86,
641
+ "learning_rate": 1.4005016722408027e-06,
642
+ "loss": 1.8772,
643
+ "step": 46500
644
+ },
645
+ {
646
+ "epoch": 4.91,
647
+ "learning_rate": 8.779264214046823e-07,
648
+ "loss": 1.8938,
649
+ "step": 47000
650
+ },
651
+ {
652
+ "epoch": 4.96,
653
+ "learning_rate": 3.553511705685619e-07,
654
+ "loss": 1.8652,
655
+ "step": 47500
656
+ },
657
+ {
658
+ "epoch": 5.0,
659
+ "eval_bleu": 0.08741495854193439,
660
+ "eval_brevity_penalty": 0.7850055548072304,
661
+ "eval_length_ratio": 0.8051111774362035,
662
+ "eval_loss": 1.8320603370666504,
663
+ "eval_precisions": [
664
+ 0.1860501756239516,
665
+ 0.11514125956444968,
666
+ 0.08657509646419298,
667
+ 0.08290830945558739
668
+ ],
669
+ "eval_reference_length": 134020,
670
+ "eval_runtime": 812.8238,
671
+ "eval_samples_per_second": 15.695,
672
+ "eval_steps_per_second": 1.309,
673
+ "eval_translation_length": 107901,
674
+ "step": 47840
675
  }
676
  ],
677
  "logging_steps": 500,
678
  "max_steps": 47840,
679
  "num_train_epochs": 5,
680
  "save_steps": 500,
681
+ "total_flos": 3.9307126302572544e+17,
682
  "trial_name": null,
683
  "trial_params": null
684
  }