Training in progress, step 5000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6b52d2c4e6f1dc1fc53e1df4ec08ffe7a50c1b6037cc45122a1b5264d5c4b91
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:29ab3bcbd54c5e63c4e604ac4ad2f368ae42aa766977dc0340b7b8e0814fb858
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13a58a7f728d5913709f013bfd6cbcb991064242e3075f2b5e93d9b5b184b9f7
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5104f05c76008a8cc4ebab2ab5f343ccdca71dafda81e126d612fe143dbfa54
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf45e6f5a33d99139eae20e5be76bd3bf9589da43c06744e1ac55dde6dda87db
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:a04575953c998a8fd3197b1b8249c8e72c33f4bb7c27b036788a4d9e537cf3cd
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:839b4043be0c777e952526844484b5d7c9eb08d95c6a855198a76f2eb1f08d84
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:a29280eedf28bde93a8485de1b90963ca69c84125cea86695b5935449e18f453
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7602635580334516,
   "eval_steps": 500,
-  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3237,6 +3237,364 @@
       "eval_samples_per_second": 280.631,
       "eval_steps_per_second": 5.893,
       "step": 4500
     }
   ],
   "logging_steps": 10,
@@ -3256,7 +3614,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.50505569386496e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8447372867038351,
   "eval_steps": 500,
+  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 280.631,
       "eval_steps_per_second": 5.893,
       "step": 4500
+    },
+    {
+      "epoch": 0.7619530326068593,
+      "grad_norm": 0.5667482614517212,
+      "learning_rate": 0.0002543762555494541,
+      "loss": 4.658942794799804,
+      "step": 4510
+    },
+    {
+      "epoch": 0.7636425071802669,
+      "grad_norm": 0.5182068347930908,
+      "learning_rate": 0.0002540317094124131,
+      "loss": 4.675426483154297,
+      "step": 4520
+    },
+    {
+      "epoch": 0.7653319817536746,
+      "grad_norm": 0.5856271982192993,
+      "learning_rate": 0.0002536861024314936,
+      "loss": 4.647851181030274,
+      "step": 4530
+    },
+    {
+      "epoch": 0.7670214563270823,
+      "grad_norm": 0.5433441400527954,
+      "learning_rate": 0.0002533394381309583,
+      "loss": 4.628954315185547,
+      "step": 4540
+    },
+    {
+      "epoch": 0.7687109309004899,
+      "grad_norm": 0.5408143401145935,
+      "learning_rate": 0.00025299172004585144,
+      "loss": 4.679843139648438,
+      "step": 4550
+    },
+    {
+      "epoch": 0.7704004054738977,
+      "grad_norm": 0.5055237412452698,
+      "learning_rate": 0.00025264295172196304,
+      "loss": 4.667029190063476,
+      "step": 4560
+    },
+    {
+      "epoch": 0.7720898800473053,
+      "grad_norm": 0.5375385284423828,
+      "learning_rate": 0.0002522931367157928,
+      "loss": 4.6552692413330075,
+      "step": 4570
+    },
+    {
+      "epoch": 0.773779354620713,
+      "grad_norm": 0.5234900712966919,
+      "learning_rate": 0.00025194227859451384,
+      "loss": 4.664133071899414,
+      "step": 4580
+    },
+    {
+      "epoch": 0.7754688291941206,
+      "grad_norm": 0.5033290386199951,
+      "learning_rate": 0.00025159038093593606,
+      "loss": 4.677631759643555,
+      "step": 4590
+    },
+    {
+      "epoch": 0.7771583037675283,
+      "grad_norm": 0.5198631286621094,
+      "learning_rate": 0.0002512374473284699,
+      "loss": 4.641722106933594,
+      "step": 4600
+    },
+    {
+      "epoch": 0.778847778340936,
+      "grad_norm": 0.5315260291099548,
+      "learning_rate": 0.00025088348137108983,
+      "loss": 4.641604614257813,
+      "step": 4610
+    },
+    {
+      "epoch": 0.7805372529143436,
+      "grad_norm": 0.5272190570831299,
+      "learning_rate": 0.0002505284866732974,
+      "loss": 4.667778778076172,
+      "step": 4620
+    },
+    {
+      "epoch": 0.7822267274877513,
+      "grad_norm": 0.5185366868972778,
+      "learning_rate": 0.0002501724668550846,
+      "loss": 4.631634902954102,
+      "step": 4630
+    },
+    {
+      "epoch": 0.783916202061159,
+      "grad_norm": 0.5354645252227783,
+      "learning_rate": 0.00024981542554689684,
+      "loss": 4.678403091430664,
+      "step": 4640
+    },
+    {
+      "epoch": 0.7856056766345666,
+      "grad_norm": 0.5226261019706726,
+      "learning_rate": 0.000249457366389596,
+      "loss": 4.658837890625,
+      "step": 4650
+    },
+    {
+      "epoch": 0.7872951512079743,
+      "grad_norm": 0.558031439781189,
+      "learning_rate": 0.0002490982930344233,
+      "loss": 4.646864318847657,
+      "step": 4660
+    },
+    {
+      "epoch": 0.788984625781382,
+      "grad_norm": 0.519002377986908,
+      "learning_rate": 0.0002487382091429621,
+      "loss": 4.6644752502441404,
+      "step": 4670
+    },
+    {
+      "epoch": 0.7906741003547897,
+      "grad_norm": 0.5250281095504761,
+      "learning_rate": 0.00024837711838710035,
+      "loss": 4.6212821960449215,
+      "step": 4680
+    },
+    {
+      "epoch": 0.7923635749281973,
+      "grad_norm": 0.5656465291976929,
+      "learning_rate": 0.00024801502444899353,
+      "loss": 4.661688995361328,
+      "step": 4690
+    },
+    {
+      "epoch": 0.794053049501605,
+      "grad_norm": 0.5460257530212402,
+      "learning_rate": 0.00024765193102102676,
+      "loss": 4.65002555847168,
+      "step": 4700
+    },
+    {
+      "epoch": 0.7957425240750127,
+      "grad_norm": 0.5244697332382202,
+      "learning_rate": 0.0002472878418057772,
+      "loss": 4.6698455810546875,
+      "step": 4710
+    },
+    {
+      "epoch": 0.7974319986484203,
+      "grad_norm": 0.5033484697341919,
+      "learning_rate": 0.0002469227605159766,
+      "loss": 4.634893798828125,
+      "step": 4720
+    },
+    {
+      "epoch": 0.799121473221828,
+      "grad_norm": 0.5970498323440552,
+      "learning_rate": 0.0002465566908744729,
+      "loss": 4.61572494506836,
+      "step": 4730
+    },
+    {
+      "epoch": 0.8008109477952357,
+      "grad_norm": 0.5307066440582275,
+      "learning_rate": 0.00024618963661419285,
+      "loss": 4.648424530029297,
+      "step": 4740
+    },
+    {
+      "epoch": 0.8025004223686434,
+      "grad_norm": 0.48402100801467896,
+      "learning_rate": 0.0002458216014781035,
+      "loss": 4.620497131347657,
+      "step": 4750
+    },
+    {
+      "epoch": 0.804189896942051,
+      "grad_norm": 0.5049648880958557,
+      "learning_rate": 0.00024545258921917416,
+      "loss": 4.6289928436279295,
+      "step": 4760
+    },
+    {
+      "epoch": 0.8058793715154587,
+      "grad_norm": 0.4983990788459778,
+      "learning_rate": 0.0002450826036003384,
+      "loss": 4.63318977355957,
+      "step": 4770
+    },
+    {
+      "epoch": 0.8075688460888664,
+      "grad_norm": 0.5099707245826721,
+      "learning_rate": 0.00024471164839445526,
+      "loss": 4.635572814941407,
+      "step": 4780
+    },
+    {
+      "epoch": 0.809258320662274,
+      "grad_norm": 0.5057718753814697,
+      "learning_rate": 0.0002443397273842709,
+      "loss": 4.644168090820313,
+      "step": 4790
+    },
+    {
+      "epoch": 0.8109477952356817,
+      "grad_norm": 0.5193650126457214,
+      "learning_rate": 0.00024396684436238025,
+      "loss": 4.605130386352539,
+      "step": 4800
+    },
+    {
+      "epoch": 0.8126372698090893,
+      "grad_norm": 0.5483851432800293,
+      "learning_rate": 0.00024359300313118814,
+      "loss": 4.638274002075195,
+      "step": 4810
+    },
+    {
+      "epoch": 0.8143267443824971,
+      "grad_norm": 0.5573034882545471,
+      "learning_rate": 0.00024321820750287045,
+      "loss": 4.6438957214355465,
+      "step": 4820
+    },
+    {
+      "epoch": 0.8160162189559047,
+      "grad_norm": 0.5450712442398071,
+      "learning_rate": 0.00024284246129933543,
+      "loss": 4.602296447753906,
+      "step": 4830
+    },
+    {
+      "epoch": 0.8177056935293124,
+      "grad_norm": 0.5248677730560303,
+      "learning_rate": 0.0002424657683521847,
+      "loss": 4.624288558959961,
+      "step": 4840
+    },
+    {
+      "epoch": 0.8193951681027201,
+      "grad_norm": 0.5236571431159973,
+      "learning_rate": 0.00024208813250267404,
+      "loss": 4.620320510864258,
+      "step": 4850
+    },
+    {
+      "epoch": 0.8210846426761277,
+      "grad_norm": 0.47448018193244934,
+      "learning_rate": 0.00024170955760167436,
+      "loss": 4.633553314208984,
+      "step": 4860
+    },
+    {
+      "epoch": 0.8227741172495354,
+      "grad_norm": 0.5197002291679382,
+      "learning_rate": 0.0002413300475096322,
+      "loss": 4.628173828125,
+      "step": 4870
+    },
+    {
+      "epoch": 0.824463591822943,
+      "grad_norm": 0.5387418270111084,
+      "learning_rate": 0.00024094960609653078,
+      "loss": 4.629827880859375,
+      "step": 4880
+    },
+    {
+      "epoch": 0.8261530663963508,
+      "grad_norm": 0.5657356977462769,
+      "learning_rate": 0.00024056823724185014,
+      "loss": 4.612607955932617,
+      "step": 4890
+    },
+    {
+      "epoch": 0.8278425409697584,
+      "grad_norm": 0.5203890204429626,
+      "learning_rate": 0.00024018594483452783,
+      "loss": 4.595291519165039,
+      "step": 4900
+    },
+    {
+      "epoch": 0.829532015543166,
+      "grad_norm": 0.5515291094779968,
+      "learning_rate": 0.00023980273277291893,
+      "loss": 4.62861213684082,
+      "step": 4910
+    },
+    {
+      "epoch": 0.8312214901165738,
+      "grad_norm": 0.5310600399971008,
+      "learning_rate": 0.00023941860496475687,
+      "loss": 4.633145141601562,
+      "step": 4920
+    },
+    {
+      "epoch": 0.8329109646899814,
+      "grad_norm": 0.5451234579086304,
+      "learning_rate": 0.00023903356532711296,
+      "loss": 4.614830780029297,
+      "step": 4930
+    },
+    {
+      "epoch": 0.8346004392633891,
+      "grad_norm": 0.47833251953125,
+      "learning_rate": 0.0002386476177863568,
+      "loss": 4.622224807739258,
+      "step": 4940
+    },
+    {
+      "epoch": 0.8362899138367967,
+      "grad_norm": 0.5025030374526978,
+      "learning_rate": 0.00023826076627811628,
+      "loss": 4.607464599609375,
+      "step": 4950
+    },
+    {
+      "epoch": 0.8379793884102045,
+      "grad_norm": 0.5535337328910828,
+      "learning_rate": 0.0002378730147472371,
+      "loss": 4.580402374267578,
+      "step": 4960
+    },
+    {
+      "epoch": 0.8396688629836121,
+      "grad_norm": 0.5151374340057373,
+      "learning_rate": 0.00023748436714774294,
+      "loss": 4.648463439941406,
+      "step": 4970
+    },
+    {
+      "epoch": 0.8413583375570197,
+      "grad_norm": 0.5126184821128845,
+      "learning_rate": 0.00023709482744279492,
+      "loss": 4.621094512939453,
+      "step": 4980
+    },
+    {
+      "epoch": 0.8430478121304275,
+      "grad_norm": 0.5208641886711121,
+      "learning_rate": 0.00023670439960465128,
+      "loss": 4.607065582275391,
+      "step": 4990
+    },
+    {
+      "epoch": 0.8447372867038351,
+      "grad_norm": 0.5431861877441406,
+      "learning_rate": 0.00023631308761462677,
+      "loss": 4.6144451141357425,
+      "step": 5000
+    },
+    {
+      "epoch": 0.8447372867038351,
+      "eval_loss": 4.566016674041748,
+      "eval_runtime": 3.5736,
+      "eval_samples_per_second": 279.83,
+      "eval_steps_per_second": 5.876,
+      "step": 5000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.6722841042944e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null