NairaRahim commited on
Commit
d54998f
·
verified ·
1 Parent(s): 2ece161

Training in progress, epoch 7, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20c40f891a6ad2cd6bdb721e2f111292589ff390313316ee8f0d082edb0b9b03
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8ae4ab42781745b2126513149381b5b6ff7214663e4449b01b2359d8311e3a
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97e7d057f22cb8197d547d1ed0d192390fd1e34fb36aa35aa6b76f03d0e2f9d9
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9434c970128034aa8822b8a0401c794db85187656338f45cc845953009b5b3
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe7c95ba6d299e128ae454cc0731e509722836b2913c0cc0546da0aa648a6383
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7565598da56842799ee28845bb7d5540de84da2eb38da30890faa373e17c3ad
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a099afb4d9a9c6bf9e5e93d59bc1aa866f860cc49e0492bfafa53bc834b220ce
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c0cd9d4d4eb26e88d5f90eed6823e5f94040581f03708c992959bc0b84c560f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.841033935546875,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-7830",
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 7830,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -601,6 +601,105 @@
601
  "eval_samples_per_second": 26.467,
602
  "eval_steps_per_second": 3.326,
603
  "step": 7830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  }
605
  ],
606
  "logging_steps": 100,
@@ -629,7 +728,7 @@
629
  "attributes": {}
630
  }
631
  },
632
- "total_flos": 8443865418283008.0,
633
  "train_batch_size": 8,
634
  "trial_name": null,
635
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.818748474121094,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-9135",
4
+ "epoch": 7.0,
5
  "eval_steps": 500,
6
+ "global_step": 9135,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
601
  "eval_samples_per_second": 26.467,
602
  "eval_steps_per_second": 3.326,
603
  "step": 7830
604
+ },
605
+ {
606
+ "epoch": 6.053639846743295,
607
+ "grad_norm": 2.996056079864502,
608
+ "learning_rate": 4.6218390804597705e-05,
609
+ "loss": 33.631,
610
+ "step": 7900
611
+ },
612
+ {
613
+ "epoch": 6.130268199233717,
614
+ "grad_norm": 3.3260300159454346,
615
+ "learning_rate": 4.617049808429119e-05,
616
+ "loss": 33.9222,
617
+ "step": 8000
618
+ },
619
+ {
620
+ "epoch": 6.206896551724138,
621
+ "grad_norm": 2.214486598968506,
622
+ "learning_rate": 4.612260536398468e-05,
623
+ "loss": 32.9576,
624
+ "step": 8100
625
+ },
626
+ {
627
+ "epoch": 6.283524904214559,
628
+ "grad_norm": 3.6611664295196533,
629
+ "learning_rate": 4.6074712643678166e-05,
630
+ "loss": 33.5231,
631
+ "step": 8200
632
+ },
633
+ {
634
+ "epoch": 6.360153256704981,
635
+ "grad_norm": 2.582730770111084,
636
+ "learning_rate": 4.602681992337165e-05,
637
+ "loss": 33.6936,
638
+ "step": 8300
639
+ },
640
+ {
641
+ "epoch": 6.436781609195402,
642
+ "grad_norm": 2.739861488342285,
643
+ "learning_rate": 4.597892720306514e-05,
644
+ "loss": 33.3997,
645
+ "step": 8400
646
+ },
647
+ {
648
+ "epoch": 6.513409961685824,
649
+ "grad_norm": 2.2102463245391846,
650
+ "learning_rate": 4.593103448275862e-05,
651
+ "loss": 33.9374,
652
+ "step": 8500
653
+ },
654
+ {
655
+ "epoch": 6.590038314176245,
656
+ "grad_norm": 3.83150577545166,
657
+ "learning_rate": 4.5883141762452106e-05,
658
+ "loss": 33.9961,
659
+ "step": 8600
660
+ },
661
+ {
662
+ "epoch": 6.666666666666667,
663
+ "grad_norm": 3.981616735458374,
664
+ "learning_rate": 4.583524904214559e-05,
665
+ "loss": 33.5413,
666
+ "step": 8700
667
+ },
668
+ {
669
+ "epoch": 6.743295019157088,
670
+ "grad_norm": 2.3303332328796387,
671
+ "learning_rate": 4.578735632183908e-05,
672
+ "loss": 34.0529,
673
+ "step": 8800
674
+ },
675
+ {
676
+ "epoch": 6.819923371647509,
677
+ "grad_norm": 3.9573702812194824,
678
+ "learning_rate": 4.573946360153257e-05,
679
+ "loss": 33.2897,
680
+ "step": 8900
681
+ },
682
+ {
683
+ "epoch": 6.896551724137931,
684
+ "grad_norm": 2.6185879707336426,
685
+ "learning_rate": 4.5691570881226054e-05,
686
+ "loss": 34.0662,
687
+ "step": 9000
688
+ },
689
+ {
690
+ "epoch": 6.973180076628353,
691
+ "grad_norm": 3.1155271530151367,
692
+ "learning_rate": 4.564367816091955e-05,
693
+ "loss": 33.517,
694
+ "step": 9100
695
+ },
696
+ {
697
+ "epoch": 7.0,
698
+ "eval_loss": 34.818748474121094,
699
+ "eval_runtime": 49.3029,
700
+ "eval_samples_per_second": 26.469,
701
+ "eval_steps_per_second": 3.326,
702
+ "step": 9135
703
  }
704
  ],
705
  "logging_steps": 100,
 
728
  "attributes": {}
729
  }
730
  },
731
+ "total_flos": 9851176321330176.0,
732
  "train_batch_size": 8,
733
  "trial_name": null,
734
  "trial_params": null