Training in progress, step 4500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd5257ed25b3deedcdfbd77b311ce64f39ce97cab4262552a2cce890d0e1ed2f
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6b52d2c4e6f1dc1fc53e1df4ec08ffe7a50c1b6037cc45122a1b5264d5c4b91
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38c6d9ddeda93bf2814232d10b3b4a6111c3ba43c271d5af0fe9ac07ad7bdf8f
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:13a58a7f728d5913709f013bfd6cbcb991064242e3075f2b5e93d9b5b184b9f7
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8647979d889bb2b15d0a3e8961a7e547be28d07767d240f858bd959476bb870c
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf45e6f5a33d99139eae20e5be76bd3bf9589da43c06744e1ac55dde6dda87db
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f34721a2fd924d02bdad3691f09e25bcb5ed140f7982be7b710c4ccbd2538c0
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:839b4043be0c777e952526844484b5d7c9eb08d95c6a855198a76f2eb1f08d84
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6757898293630681,
   "eval_steps": 500,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2879,6 +2879,364 @@
       "eval_samples_per_second": 276.145,
       "eval_steps_per_second": 5.799,
       "step": 4000
     }
   ],
   "logging_steps": 10,
@@ -2898,7 +3256,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.33782728343552e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7602635580334516,
   "eval_steps": 500,
+  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 276.145,
       "eval_steps_per_second": 5.799,
       "step": 4000
+    },
+    {
+      "epoch": 0.6774793039364757,
+      "grad_norm": 0.5737301111221313,
+      "learning_rate": 0.00027017636818368575,
+      "loss": 4.737479400634766,
+      "step": 4010
+    },
+    {
+      "epoch": 0.6791687785098834,
+      "grad_norm": 0.5806599855422974,
+      "learning_rate": 0.0002698891091982504,
+      "loss": 4.715182876586914,
+      "step": 4020
+    },
+    {
+      "epoch": 0.6808582530832911,
+      "grad_norm": 0.5259511470794678,
+      "learning_rate": 0.00026960062766607135,
+      "loss": 4.735322189331055,
+      "step": 4030
+    },
+    {
+      "epoch": 0.6825477276566988,
+      "grad_norm": 0.5434650182723999,
+      "learning_rate": 0.0002693109265288851,
+      "loss": 4.725672912597656,
+      "step": 4040
+    },
+    {
+      "epoch": 0.6842372022301064,
+      "grad_norm": 0.5198240876197815,
+      "learning_rate": 0.0002690200087408648,
+      "loss": 4.725751113891602,
+      "step": 4050
+    },
+    {
+      "epoch": 0.6859266768035142,
+      "grad_norm": 0.5575292110443115,
+      "learning_rate": 0.00026872787726859004,
+      "loss": 4.715484619140625,
+      "step": 4060
+    },
+    {
+      "epoch": 0.6876161513769218,
+      "grad_norm": 0.561512291431427,
+      "learning_rate": 0.0002684345350910169,
+      "loss": 4.725441360473633,
+      "step": 4070
+    },
+    {
+      "epoch": 0.6893056259503294,
+      "grad_norm": 0.5424394011497498,
+      "learning_rate": 0.0002681399851994472,
+      "loss": 4.7274932861328125,
+      "step": 4080
+    },
+    {
+      "epoch": 0.6909951005237371,
+      "grad_norm": 0.5293362140655518,
+      "learning_rate": 0.00026784423059749845,
+      "loss": 4.7252765655517575,
+      "step": 4090
+    },
+    {
+      "epoch": 0.6926845750971448,
+      "grad_norm": 0.5352376103401184,
+      "learning_rate": 0.0002675472743010727,
+      "loss": 4.697403335571289,
+      "step": 4100
+    },
+    {
+      "epoch": 0.6943740496705525,
+      "grad_norm": 0.5501886010169983,
+      "learning_rate": 0.0002672491193383263,
+      "loss": 4.723195648193359,
+      "step": 4110
+    },
+    {
+      "epoch": 0.6960635242439601,
+      "grad_norm": 0.5308210253715515,
+      "learning_rate": 0.00026694976874963854,
+      "loss": 4.736632919311523,
+      "step": 4120
+    },
+    {
+      "epoch": 0.6977529988173679,
+      "grad_norm": 0.533686101436615,
+      "learning_rate": 0.00026664922558758105,
+      "loss": 4.699850463867188,
+      "step": 4130
+    },
+    {
+      "epoch": 0.6994424733907755,
+      "grad_norm": 0.52994704246521,
+      "learning_rate": 0.00026634749291688646,
+      "loss": 4.7275341033935545,
+      "step": 4140
+    },
+    {
+      "epoch": 0.7011319479641831,
+      "grad_norm": 0.5824037790298462,
+      "learning_rate": 0.00026604457381441715,
+      "loss": 4.705679702758789,
+      "step": 4150
+    },
+    {
+      "epoch": 0.7028214225375908,
+      "grad_norm": 0.6155262589454651,
+      "learning_rate": 0.00026574047136913403,
+      "loss": 4.699795150756836,
+      "step": 4160
+    },
+    {
+      "epoch": 0.7045108971109985,
+      "grad_norm": 0.5350865721702576,
+      "learning_rate": 0.0002654351886820648,
+      "loss": 4.712226867675781,
+      "step": 4170
+    },
+    {
+      "epoch": 0.7062003716844062,
+      "grad_norm": 0.5593312382698059,
+      "learning_rate": 0.0002651287288662724,
+      "loss": 4.721994018554687,
+      "step": 4180
+    },
+    {
+      "epoch": 0.7078898462578138,
+      "grad_norm": 0.5269652605056763,
+      "learning_rate": 0.0002648210950468236,
+      "loss": 4.703836822509766,
+      "step": 4190
+    },
+    {
+      "epoch": 0.7095793208312214,
+      "grad_norm": 0.5680537223815918,
+      "learning_rate": 0.0002645122903607566,
+      "loss": 4.695099258422852,
+      "step": 4200
+    },
+    {
+      "epoch": 0.7112687954046292,
+      "grad_norm": 0.5447277426719666,
+      "learning_rate": 0.0002642023179570493,
+      "loss": 4.695394515991211,
+      "step": 4210
+    },
+    {
+      "epoch": 0.7129582699780368,
+      "grad_norm": 0.5375188589096069,
+      "learning_rate": 0.0002638911809965874,
+      "loss": 4.705070495605469,
+      "step": 4220
+    },
+    {
+      "epoch": 0.7146477445514445,
+      "grad_norm": 0.5439088940620422,
+      "learning_rate": 0.0002635788826521316,
+      "loss": 4.692306900024414,
+      "step": 4230
+    },
+    {
+      "epoch": 0.7163372191248522,
+      "grad_norm": 0.5620496869087219,
+      "learning_rate": 0.00026326542610828597,
+      "loss": 4.7032218933105465,
+      "step": 4240
+    },
+    {
+      "epoch": 0.7180266936982599,
+      "grad_norm": 0.527233362197876,
+      "learning_rate": 0.00026295081456146485,
+      "loss": 4.714799880981445,
+      "step": 4250
+    },
+    {
+      "epoch": 0.7197161682716675,
+      "grad_norm": 0.5378382205963135,
+      "learning_rate": 0.0002626350512198606,
+      "loss": 4.6938121795654295,
+      "step": 4260
+    },
+    {
+      "epoch": 0.7214056428450751,
+      "grad_norm": 0.5405885577201843,
+      "learning_rate": 0.0002623181393034108,
+      "loss": 4.696908950805664,
+      "step": 4270
+    },
+    {
+      "epoch": 0.7230951174184829,
+      "grad_norm": 0.5289508700370789,
+      "learning_rate": 0.00026200008204376525,
+      "loss": 4.715534210205078,
+      "step": 4280
+    },
+    {
+      "epoch": 0.7247845919918905,
+      "grad_norm": 0.79053795337677,
+      "learning_rate": 0.00026168088268425346,
+      "loss": 4.691967391967774,
+      "step": 4290
+    },
+    {
+      "epoch": 0.7264740665652982,
+      "grad_norm": 0.5415652394294739,
+      "learning_rate": 0.00026136054447985105,
+      "loss": 4.698383331298828,
+      "step": 4300
+    },
+    {
+      "epoch": 0.7281635411387058,
+      "grad_norm": 0.5491306781768799,
+      "learning_rate": 0.00026103907069714694,
+      "loss": 4.708710479736328,
+      "step": 4310
+    },
+    {
+      "epoch": 0.7298530157121136,
+      "grad_norm": 0.5362562537193298,
+      "learning_rate": 0.0002607164646143098,
+      "loss": 4.68592643737793,
+      "step": 4320
+    },
+    {
+      "epoch": 0.7315424902855212,
+      "grad_norm": 0.5329167246818542,
+      "learning_rate": 0.0002603927295210547,
+      "loss": 4.681344223022461,
+      "step": 4330
+    },
+    {
+      "epoch": 0.7332319648589288,
+      "grad_norm": 0.5879621505737305,
+      "learning_rate": 0.00026006786871860975,
+      "loss": 4.659723281860352,
+      "step": 4340
+    },
+    {
+      "epoch": 0.7349214394323366,
+      "grad_norm": 0.5552240014076233,
+      "learning_rate": 0.00025974188551968207,
+      "loss": 4.70800552368164,
+      "step": 4350
+    },
+    {
+      "epoch": 0.7366109140057442,
+      "grad_norm": 0.5462090373039246,
+      "learning_rate": 0.0002594147832484243,
+      "loss": 4.6786457061767575,
+      "step": 4360
+    },
+    {
+      "epoch": 0.7383003885791519,
+      "grad_norm": 0.515416145324707,
+      "learning_rate": 0.0002590865652404007,
+      "loss": 4.681232452392578,
+      "step": 4370
+    },
+    {
+      "epoch": 0.7399898631525595,
+      "grad_norm": 0.5645248293876648,
+      "learning_rate": 0.0002587572348425529,
+      "loss": 4.682769775390625,
+      "step": 4380
+    },
+    {
+      "epoch": 0.7416793377259672,
+      "grad_norm": 0.5235434174537659,
+      "learning_rate": 0.0002584267954131659,
+      "loss": 4.673912811279297,
+      "step": 4390
+    },
+    {
+      "epoch": 0.7433688122993749,
+      "grad_norm": 0.5084188580513,
+      "learning_rate": 0.000258095250321834,
+      "loss": 4.675137329101562,
+      "step": 4400
+    },
+    {
+      "epoch": 0.7450582868727825,
+      "grad_norm": 0.5971478819847107,
+      "learning_rate": 0.00025776260294942615,
+      "loss": 4.688092422485352,
+      "step": 4410
+    },
+    {
+      "epoch": 0.7467477614461903,
+      "grad_norm": 0.5779770016670227,
+      "learning_rate": 0.0002574288566880517,
+      "loss": 4.664862823486328,
+      "step": 4420
+    },
+    {
+      "epoch": 0.7484372360195979,
+      "grad_norm": 0.5589803457260132,
+      "learning_rate": 0.0002570940149410256,
+      "loss": 4.665248870849609,
+      "step": 4430
+    },
+    {
+      "epoch": 0.7501267105930056,
+      "grad_norm": 0.4953916072845459,
+      "learning_rate": 0.00025675808112283387,
+      "loss": 4.670894622802734,
+      "step": 4440
+    },
+    {
+      "epoch": 0.7518161851664132,
+      "grad_norm": 0.5200746059417725,
+      "learning_rate": 0.00025642105865909874,
+      "loss": 4.664446258544922,
+      "step": 4450
+    },
+    {
+      "epoch": 0.7535056597398209,
+      "grad_norm": 0.7123140692710876,
+      "learning_rate": 0.0002560829509865437,
+      "loss": 4.660491943359375,
+      "step": 4460
+    },
+    {
+      "epoch": 0.7551951343132286,
+      "grad_norm": 0.5178130865097046,
+      "learning_rate": 0.00025574376155295845,
+      "loss": 4.669913101196289,
+      "step": 4470
+    },
+    {
+      "epoch": 0.7568846088866362,
+      "grad_norm": 0.5300018191337585,
+      "learning_rate": 0.00025540349381716367,
+      "loss": 4.688555145263672,
+      "step": 4480
+    },
+    {
+      "epoch": 0.758574083460044,
+      "grad_norm": 0.6072678565979004,
+      "learning_rate": 0.00025506215124897593,
+      "loss": 4.667338562011719,
+      "step": 4490
+    },
+    {
+      "epoch": 0.7602635580334516,
+      "grad_norm": 0.5844916701316833,
+      "learning_rate": 0.0002547197373291721,
+      "loss": 4.678690719604492,
+      "step": 4500
+    },
+    {
+      "epoch": 0.7602635580334516,
+      "eval_loss": 4.629103660583496,
+      "eval_runtime": 3.5634,
+      "eval_samples_per_second": 280.631,
+      "eval_steps_per_second": 5.893,
+      "step": 4500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.50505569386496e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null