Training in progress, step 4500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a45987252e54dc35108e11e93cd15c2f7eff117407dadcc866c536c9fe38d549
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:71e25cdc8697039d1202fb4440876be16955562540fb206d7cbbcfc37a7f33da
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1032541ab3e6eca2a68a25836b91b06b42d61052c34cec2e6dfe0544f185dcf0
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a082fad0106d612afcdf0e9dbf262fd1aa3ca7c9a2ef45f2a14751b1d80d165
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:329a377c90ca49d3bcb8c01bcb7bdf9bc769af05915d36720b3201a9c222f867
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:3343121e0ab3aeb674ab29d872307564462c4bd82cdd92e6577a4ff26999fc00
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f34721a2fd924d02bdad3691f09e25bcb5ed140f7982be7b710c4ccbd2538c0
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:839b4043be0c777e952526844484b5d7c9eb08d95c6a855198a76f2eb1f08d84
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6757898293630681,
   "eval_steps": 500,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2879,6 +2879,364 @@
       "eval_samples_per_second": 263.356,
       "eval_steps_per_second": 5.53,
       "step": 4000
     }
   ],
   "logging_steps": 10,
@@ -2898,7 +3256,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.33782728343552e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7602635580334516,
   "eval_steps": 500,
+  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 263.356,
       "eval_steps_per_second": 5.53,
       "step": 4000
+    },
+    {
+      "epoch": 0.6774793039364757,
+      "grad_norm": 0.5953722596168518,
+      "learning_rate": 0.00027017636818368575,
+      "loss": 4.737245559692383,
+      "step": 4010
+    },
+    {
+      "epoch": 0.6791687785098834,
+      "grad_norm": 0.6203189492225647,
+      "learning_rate": 0.0002698891091982504,
+      "loss": 4.716173934936523,
+      "step": 4020
+    },
+    {
+      "epoch": 0.6808582530832911,
+      "grad_norm": 0.5239487886428833,
+      "learning_rate": 0.00026960062766607135,
+      "loss": 4.735467529296875,
+      "step": 4030
+    },
+    {
+      "epoch": 0.6825477276566988,
+      "grad_norm": 0.5474298000335693,
+      "learning_rate": 0.0002693109265288851,
+      "loss": 4.725514984130859,
+      "step": 4040
+    },
+    {
+      "epoch": 0.6842372022301064,
+      "grad_norm": 0.5452102422714233,
+      "learning_rate": 0.0002690200087408648,
+      "loss": 4.726776885986328,
+      "step": 4050
+    },
+    {
+      "epoch": 0.6859266768035142,
+      "grad_norm": 0.6271504759788513,
+      "learning_rate": 0.00026872787726859004,
+      "loss": 4.71842041015625,
+      "step": 4060
+    },
+    {
+      "epoch": 0.6876161513769218,
+      "grad_norm": 0.5585569143295288,
+      "learning_rate": 0.0002684345350910169,
+      "loss": 4.728883361816406,
+      "step": 4070
+    },
+    {
+      "epoch": 0.6893056259503294,
+      "grad_norm": 0.544662594795227,
+      "learning_rate": 0.0002681399851994472,
+      "loss": 4.729270553588867,
+      "step": 4080
+    },
+    {
+      "epoch": 0.6909951005237371,
+      "grad_norm": 0.5363122820854187,
+      "learning_rate": 0.00026784423059749845,
+      "loss": 4.726214599609375,
+      "step": 4090
+    },
+    {
+      "epoch": 0.6926845750971448,
+      "grad_norm": 0.5298801064491272,
+      "learning_rate": 0.0002675472743010727,
+      "loss": 4.697872924804687,
+      "step": 4100
+    },
+    {
+      "epoch": 0.6943740496705525,
+      "grad_norm": 0.5710757374763489,
+      "learning_rate": 0.0002672491193383263,
+      "loss": 4.723146438598633,
+      "step": 4110
+    },
+    {
+      "epoch": 0.6960635242439601,
+      "grad_norm": 0.5484883785247803,
+      "learning_rate": 0.00026694976874963854,
+      "loss": 4.738557052612305,
+      "step": 4120
+    },
+    {
+      "epoch": 0.6977529988173679,
+      "grad_norm": 0.5273333191871643,
+      "learning_rate": 0.00026664922558758105,
+      "loss": 4.700592803955078,
+      "step": 4130
+    },
+    {
+      "epoch": 0.6994424733907755,
+      "grad_norm": 0.5574657320976257,
+      "learning_rate": 0.00026634749291688646,
+      "loss": 4.729513168334961,
+      "step": 4140
+    },
+    {
+      "epoch": 0.7011319479641831,
+      "grad_norm": 0.5571582317352295,
+      "learning_rate": 0.00026604457381441715,
+      "loss": 4.706221389770508,
+      "step": 4150
+    },
+    {
+      "epoch": 0.7028214225375908,
+      "grad_norm": 0.6286988258361816,
+      "learning_rate": 0.00026574047136913403,
+      "loss": 4.701080322265625,
+      "step": 4160
+    },
+    {
+      "epoch": 0.7045108971109985,
+      "grad_norm": 0.5314433574676514,
+      "learning_rate": 0.0002654351886820648,
+      "loss": 4.714921188354492,
+      "step": 4170
+    },
+    {
+      "epoch": 0.7062003716844062,
+      "grad_norm": 0.539644718170166,
+      "learning_rate": 0.0002651287288662724,
+      "loss": 4.722955703735352,
+      "step": 4180
+    },
+    {
+      "epoch": 0.7078898462578138,
+      "grad_norm": 0.5164220333099365,
+      "learning_rate": 0.0002648210950468236,
+      "loss": 4.7029579162597654,
+      "step": 4190
+    },
+    {
+      "epoch": 0.7095793208312214,
+      "grad_norm": 0.5345500111579895,
+      "learning_rate": 0.0002645122903607566,
+      "loss": 4.696025085449219,
+      "step": 4200
+    },
+    {
+      "epoch": 0.7112687954046292,
+      "grad_norm": 0.5561880469322205,
+      "learning_rate": 0.0002642023179570493,
+      "loss": 4.696010971069336,
+      "step": 4210
+    },
+    {
+      "epoch": 0.7129582699780368,
+      "grad_norm": 0.5260653495788574,
+      "learning_rate": 0.0002638911809965874,
+      "loss": 4.705658721923828,
+      "step": 4220
+    },
+    {
+      "epoch": 0.7146477445514445,
+      "grad_norm": 0.517846941947937,
+      "learning_rate": 0.0002635788826521316,
+      "loss": 4.690948104858398,
+      "step": 4230
+    },
+    {
+      "epoch": 0.7163372191248522,
+      "grad_norm": 0.5815365314483643,
+      "learning_rate": 0.00026326542610828597,
+      "loss": 4.702710723876953,
+      "step": 4240
+    },
+    {
+      "epoch": 0.7180266936982599,
+      "grad_norm": 0.5511707067489624,
+      "learning_rate": 0.00026295081456146485,
+      "loss": 4.713930130004883,
+      "step": 4250
+    },
+    {
+      "epoch": 0.7197161682716675,
+      "grad_norm": 0.5390937924385071,
+      "learning_rate": 0.0002626350512198606,
+      "loss": 4.694212341308594,
+      "step": 4260
+    },
+    {
+      "epoch": 0.7214056428450751,
+      "grad_norm": 0.5410081744194031,
+      "learning_rate": 0.0002623181393034108,
+      "loss": 4.696395492553711,
+      "step": 4270
+    },
+    {
+      "epoch": 0.7230951174184829,
+      "grad_norm": 0.5272055268287659,
+      "learning_rate": 0.00026200008204376525,
+      "loss": 4.715652847290039,
+      "step": 4280
+    },
+    {
+      "epoch": 0.7247845919918905,
+      "grad_norm": 0.5485383868217468,
+      "learning_rate": 0.00026168088268425346,
+      "loss": 4.689223861694336,
+      "step": 4290
+    },
+    {
+      "epoch": 0.7264740665652982,
+      "grad_norm": 0.4974030554294586,
+      "learning_rate": 0.00026136054447985105,
+      "loss": 4.6958671569824215,
+      "step": 4300
+    },
+    {
+      "epoch": 0.7281635411387058,
+      "grad_norm": 0.5421763062477112,
+      "learning_rate": 0.00026103907069714694,
+      "loss": 4.706072235107422,
+      "step": 4310
+    },
+    {
+      "epoch": 0.7298530157121136,
+      "grad_norm": 0.5402170419692993,
+      "learning_rate": 0.0002607164646143098,
+      "loss": 4.684348297119141,
+      "step": 4320
+    },
+    {
+      "epoch": 0.7315424902855212,
+      "grad_norm": 0.5388095378875732,
+      "learning_rate": 0.0002603927295210547,
+      "loss": 4.681607818603515,
+      "step": 4330
+    },
+    {
+      "epoch": 0.7332319648589288,
+      "grad_norm": 0.5691295266151428,
+      "learning_rate": 0.00026006786871860975,
+      "loss": 4.659119033813477,
+      "step": 4340
+    },
+    {
+      "epoch": 0.7349214394323366,
+      "grad_norm": 0.5657386183738708,
+      "learning_rate": 0.00025974188551968207,
+      "loss": 4.707662963867188,
+      "step": 4350
+    },
+    {
+      "epoch": 0.7366109140057442,
+      "grad_norm": 0.5887618660926819,
+      "learning_rate": 0.0002594147832484243,
+      "loss": 4.678396606445313,
+      "step": 4360
+    },
+    {
+      "epoch": 0.7383003885791519,
+      "grad_norm": 0.5618587136268616,
+      "learning_rate": 0.0002590865652404007,
+      "loss": 4.6809638977050785,
+      "step": 4370
+    },
+    {
+      "epoch": 0.7399898631525595,
+      "grad_norm": 0.5673303604125977,
+      "learning_rate": 0.0002587572348425529,
+      "loss": 4.683576583862305,
+      "step": 4380
+    },
+    {
+      "epoch": 0.7416793377259672,
+      "grad_norm": 0.5109097361564636,
+      "learning_rate": 0.0002584267954131659,
+      "loss": 4.674320983886719,
+      "step": 4390
+    },
+    {
+      "epoch": 0.7433688122993749,
+      "grad_norm": 0.5133926272392273,
+      "learning_rate": 0.000258095250321834,
+      "loss": 4.676524353027344,
+      "step": 4400
+    },
+    {
+      "epoch": 0.7450582868727825,
+      "grad_norm": 0.5628970265388489,
+      "learning_rate": 0.00025776260294942615,
+      "loss": 4.688607025146484,
+      "step": 4410
+    },
+    {
+      "epoch": 0.7467477614461903,
+      "grad_norm": 0.5761396884918213,
+      "learning_rate": 0.0002574288566880517,
+      "loss": 4.666116333007812,
+      "step": 4420
+    },
+    {
+      "epoch": 0.7484372360195979,
+      "grad_norm": 0.5518139004707336,
+      "learning_rate": 0.0002570940149410256,
+      "loss": 4.665610504150391,
+      "step": 4430
+    },
+    {
+      "epoch": 0.7501267105930056,
+      "grad_norm": 0.5176488757133484,
+      "learning_rate": 0.00025675808112283387,
+      "loss": 4.673014831542969,
+      "step": 4440
+    },
+    {
+      "epoch": 0.7518161851664132,
+      "grad_norm": 0.5482094287872314,
+      "learning_rate": 0.00025642105865909874,
+      "loss": 4.665557098388672,
+      "step": 4450
+    },
+    {
+      "epoch": 0.7535056597398209,
+      "grad_norm": 0.7407347559928894,
+      "learning_rate": 0.0002560829509865437,
+      "loss": 4.660655975341797,
+      "step": 4460
+    },
+    {
+      "epoch": 0.7551951343132286,
+      "grad_norm": 0.5341119766235352,
+      "learning_rate": 0.00025574376155295845,
+      "loss": 4.670759582519532,
+      "step": 4470
+    },
+    {
+      "epoch": 0.7568846088866362,
+      "grad_norm": 0.5163617134094238,
+      "learning_rate": 0.00025540349381716367,
+      "loss": 4.689437484741211,
+      "step": 4480
+    },
+    {
+      "epoch": 0.758574083460044,
+      "grad_norm": 0.6329180598258972,
+      "learning_rate": 0.00025506215124897593,
+      "loss": 4.6677288055419925,
+      "step": 4490
+    },
+    {
+      "epoch": 0.7602635580334516,
+      "grad_norm": 0.5871708393096924,
+      "learning_rate": 0.0002547197373291721,
+      "loss": 4.678330993652343,
+      "step": 4500
+    },
+    {
+      "epoch": 0.7602635580334516,
+      "eval_loss": 4.647042751312256,
+      "eval_runtime": 3.6169,
+      "eval_samples_per_second": 276.482,
+      "eval_steps_per_second": 5.806,
+      "step": 4500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.50505569386496e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null