Training in progress, step 6500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c74bfe809433060df3635ef406235f0717bc42781fff9acd5df0f855eb57b3f
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:539ec21ed2f2d5401b90d0d0b28a43621343b47ec158a5dc912ef7d73a069cdf
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70990f23441c3c0fadf8ff7b5b48864178e6a3f9dbc5c1184cb7c19ddf968c0f
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:c631e1446372f309276121049f5c8b7603bed555765afc41b0a5db7f194949eb
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11940f1313899a11d3e47a2d43f508134dd8e03ac7613f4eca32c754da2d1839
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:59ab6babcc58d5a8a0338e2999283607960e6faa29d71e8d0c3f11e2480b272d
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5732bb4fae95fda377427872ad7c4fed0c45a84922701b3143ffa39cf761f9db
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad5a3c7ee6384cdea60f7a41957135fc1d6a8e0bdd3b9a0dd5c4c46f69d638ec
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0136847440446022,
   "eval_steps": 500,
-  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4311,6 +4311,364 @@
       "eval_samples_per_second": 208.452,
       "eval_steps_per_second": 4.377,
       "step": 6000
     }
   ],
   "logging_steps": 10,
@@ -4330,7 +4688,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.0067200216019763e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0981584727149856,
   "eval_steps": 500,
+  "global_step": 6500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 208.452,
       "eval_steps_per_second": 4.377,
       "step": 6000
+    },
+    {
+      "epoch": 1.0153742186180097,
+      "grad_norm": 0.5339483022689819,
+      "learning_rate": 0.00019297799453520028,
+      "loss": 4.500830459594726,
+      "step": 6010
+    },
+    {
+      "epoch": 1.0170636931914174,
+      "grad_norm": 0.5642256736755371,
+      "learning_rate": 0.00019251885987680252,
+      "loss": 4.485746002197265,
+      "step": 6020
+    },
+    {
+      "epoch": 1.0187531677648252,
+      "grad_norm": 0.5060975551605225,
+      "learning_rate": 0.00019205929164029217,
+      "loss": 4.475402450561523,
+      "step": 6030
+    },
+    {
+      "epoch": 1.020442642338233,
+      "grad_norm": 0.49786120653152466,
+      "learning_rate": 0.00019159929451203033,
+      "loss": 4.486777114868164,
+      "step": 6040
+    },
+    {
+      "epoch": 1.0221321169116404,
+      "grad_norm": 0.506598949432373,
+      "learning_rate": 0.00019113887318275149,
+      "loss": 4.489146041870117,
+      "step": 6050
+    },
+    {
+      "epoch": 1.0238215914850481,
+      "grad_norm": 0.48270103335380554,
+      "learning_rate": 0.00019067803234751603,
+      "loss": 4.474691009521484,
+      "step": 6060
+    },
+    {
+      "epoch": 1.0255110660584559,
+      "grad_norm": 0.48239970207214355,
+      "learning_rate": 0.00019021677670566208,
+      "loss": 4.4708606719970705,
+      "step": 6070
+    },
+    {
+      "epoch": 1.0272005406318634,
+      "grad_norm": 0.4966093599796295,
+      "learning_rate": 0.00018975511096075762,
+      "loss": 4.505655670166016,
+      "step": 6080
+    },
+    {
+      "epoch": 1.0288900152052711,
+      "grad_norm": 0.5429375767707825,
+      "learning_rate": 0.00018929303982055272,
+      "loss": 4.499347305297851,
+      "step": 6090
+    },
+    {
+      "epoch": 1.0305794897786789,
+      "grad_norm": 0.4981507360935211,
+      "learning_rate": 0.00018883056799693125,
+      "loss": 4.461819839477539,
+      "step": 6100
+    },
+    {
+      "epoch": 1.0322689643520866,
+      "grad_norm": 0.5121614336967468,
+      "learning_rate": 0.00018836770020586315,
+      "loss": 4.478689956665039,
+      "step": 6110
+    },
+    {
+      "epoch": 1.033958438925494,
+      "grad_norm": 0.4835728406906128,
+      "learning_rate": 0.00018790444116735595,
+      "loss": 4.47772216796875,
+      "step": 6120
+    },
+    {
+      "epoch": 1.0356479134989018,
+      "grad_norm": 0.4881206154823303,
+      "learning_rate": 0.00018744079560540695,
+      "loss": 4.479801177978516,
+      "step": 6130
+    },
+    {
+      "epoch": 1.0373373880723096,
+      "grad_norm": 0.47434431314468384,
+      "learning_rate": 0.000186976768247955,
+      "loss": 4.480235290527344,
+      "step": 6140
+    },
+    {
+      "epoch": 1.039026862645717,
+      "grad_norm": 0.48258504271507263,
+      "learning_rate": 0.00018651236382683225,
+      "loss": 4.469864273071289,
+      "step": 6150
+    },
+    {
+      "epoch": 1.0407163372191248,
+      "grad_norm": 0.5025637745857239,
+      "learning_rate": 0.0001860475870777157,
+      "loss": 4.472750091552735,
+      "step": 6160
+    },
+    {
+      "epoch": 1.0424058117925326,
+      "grad_norm": 0.4636594355106354,
+      "learning_rate": 0.0001855824427400793,
+      "loss": 4.450835418701172,
+      "step": 6170
+    },
+    {
+      "epoch": 1.0440952863659403,
+      "grad_norm": 0.4901501536369324,
+      "learning_rate": 0.00018511693555714535,
+      "loss": 4.490735626220703,
+      "step": 6180
+    },
+    {
+      "epoch": 1.0457847609393478,
+      "grad_norm": 0.5198561549186707,
+      "learning_rate": 0.00018465107027583615,
+      "loss": 4.474180221557617,
+      "step": 6190
+    },
+    {
+      "epoch": 1.0474742355127555,
+      "grad_norm": 0.4723539352416992,
+      "learning_rate": 0.00018418485164672574,
+      "loss": 4.4745361328125,
+      "step": 6200
+    },
+    {
+      "epoch": 1.0491637100861633,
+      "grad_norm": 0.5074954628944397,
+      "learning_rate": 0.00018371828442399128,
+      "loss": 4.469810485839844,
+      "step": 6210
+    },
+    {
+      "epoch": 1.0508531846595708,
+      "grad_norm": 0.49918699264526367,
+      "learning_rate": 0.00018325137336536464,
+      "loss": 4.442096710205078,
+      "step": 6220
+    },
+    {
+      "epoch": 1.0525426592329785,
+      "grad_norm": 0.5088530778884888,
+      "learning_rate": 0.00018278412323208392,
+      "loss": 4.484762573242188,
+      "step": 6230
+    },
+    {
+      "epoch": 1.0542321338063863,
+      "grad_norm": 0.506341814994812,
+      "learning_rate": 0.00018231653878884486,
+      "loss": 4.486656188964844,
+      "step": 6240
+    },
+    {
+      "epoch": 1.055921608379794,
+      "grad_norm": 0.5262649059295654,
+      "learning_rate": 0.00018184862480375233,
+      "loss": 4.455668640136719,
+      "step": 6250
+    },
+    {
+      "epoch": 1.0576110829532015,
+      "grad_norm": 0.5115051865577698,
+      "learning_rate": 0.00018138038604827153,
+      "loss": 4.479043960571289,
+      "step": 6260
+    },
+    {
+      "epoch": 1.0593005575266092,
+      "grad_norm": 0.50110924243927,
+      "learning_rate": 0.0001809118272971795,
+      "loss": 4.446685409545898,
+      "step": 6270
+    },
+    {
+      "epoch": 1.060990032100017,
+      "grad_norm": 0.5022484660148621,
+      "learning_rate": 0.0001804429533285164,
+      "loss": 4.4593353271484375,
+      "step": 6280
+    },
+    {
+      "epoch": 1.0626795066734245,
+      "grad_norm": 0.492165744304657,
+      "learning_rate": 0.00017997376892353668,
+      "loss": 4.496971511840821,
+      "step": 6290
+    },
+    {
+      "epoch": 1.0643689812468322,
+      "grad_norm": 0.5134599208831787,
+      "learning_rate": 0.0001795042788666605,
+      "loss": 4.465629196166992,
+      "step": 6300
+    },
+    {
+      "epoch": 1.06605845582024,
+      "grad_norm": 0.5151488184928894,
+      "learning_rate": 0.00017903448794542488,
+      "loss": 4.454899597167969,
+      "step": 6310
+    },
+    {
+      "epoch": 1.0677479303936477,
+      "grad_norm": 0.5240500569343567,
+      "learning_rate": 0.00017856440095043464,
+      "loss": 4.481625747680664,
+      "step": 6320
+    },
+    {
+      "epoch": 1.0694374049670552,
+      "grad_norm": 0.5187123417854309,
+      "learning_rate": 0.00017809402267531405,
+      "loss": 4.437789535522461,
+      "step": 6330
+    },
+    {
+      "epoch": 1.071126879540463,
+      "grad_norm": 0.4693409502506256,
+      "learning_rate": 0.00017762335791665735,
+      "loss": 4.450423812866211,
+      "step": 6340
+    },
+    {
+      "epoch": 1.0728163541138707,
+      "grad_norm": 0.5061246752738953,
+      "learning_rate": 0.00017715241147398035,
+      "loss": 4.46313705444336,
+      "step": 6350
+    },
+    {
+      "epoch": 1.0745058286872782,
+      "grad_norm": 0.47927796840667725,
+      "learning_rate": 0.00017668118814967126,
+      "loss": 4.446915817260742,
+      "step": 6360
+    },
+    {
+      "epoch": 1.076195303260686,
+      "grad_norm": 0.47587907314300537,
+      "learning_rate": 0.00017620969274894163,
+      "loss": 4.461398696899414,
+      "step": 6370
+    },
+    {
+      "epoch": 1.0778847778340936,
+      "grad_norm": 0.5091392397880554,
+      "learning_rate": 0.00017573793007977763,
+      "loss": 4.450970458984375,
+      "step": 6380
+    },
+    {
+      "epoch": 1.0795742524075012,
+      "grad_norm": 0.5105127692222595,
+      "learning_rate": 0.0001752659049528906,
+      "loss": 4.458657455444336,
+      "step": 6390
+    },
+    {
+      "epoch": 1.081263726980909,
+      "grad_norm": 0.5196726322174072,
+      "learning_rate": 0.00017479362218166854,
+      "loss": 4.444008636474609,
+      "step": 6400
+    },
+    {
+      "epoch": 1.0829532015543166,
+      "grad_norm": 0.4891359210014343,
+      "learning_rate": 0.0001743210865821265,
+      "loss": 4.436445236206055,
+      "step": 6410
+    },
+    {
+      "epoch": 1.0846426761277244,
+      "grad_norm": 0.5141095519065857,
+      "learning_rate": 0.0001738483029728578,
+      "loss": 4.455481338500976,
+      "step": 6420
+    },
+    {
+      "epoch": 1.0863321507011319,
+      "grad_norm": 0.5223525166511536,
+      "learning_rate": 0.00017337527617498474,
+      "loss": 4.485405731201172,
+      "step": 6430
+    },
+    {
+      "epoch": 1.0880216252745396,
+      "grad_norm": 0.4939091205596924,
+      "learning_rate": 0.0001729020110121096,
+      "loss": 4.448784255981446,
+      "step": 6440
+    },
+    {
+      "epoch": 1.0897110998479473,
+      "grad_norm": 0.49695253372192383,
+      "learning_rate": 0.0001724285123102652,
+      "loss": 4.4587146759033205,
+      "step": 6450
+    },
+    {
+      "epoch": 1.091400574421355,
+      "grad_norm": 0.4882517158985138,
+      "learning_rate": 0.00017195478489786593,
+      "loss": 4.43580207824707,
+      "step": 6460
+    },
+    {
+      "epoch": 1.0930900489947626,
+      "grad_norm": 0.4971882998943329,
+      "learning_rate": 0.00017148083360565836,
+      "loss": 4.436479949951172,
+      "step": 6470
+    },
+    {
+      "epoch": 1.0947795235681703,
+      "grad_norm": 0.4835260808467865,
+      "learning_rate": 0.00017100666326667202,
+      "loss": 4.476963043212891,
+      "step": 6480
+    },
+    {
+      "epoch": 1.096468998141578,
+      "grad_norm": 0.4847490191459656,
+      "learning_rate": 0.00017053227871617027,
+      "loss": 4.449015426635742,
+      "step": 6490
+    },
+    {
+      "epoch": 1.0981584727149856,
+      "grad_norm": 0.5305824279785156,
+      "learning_rate": 0.00017005768479160064,
+      "loss": 4.452330780029297,
+      "step": 6500
+    },
+    {
+      "epoch": 1.0981584727149856,
+      "eval_loss": 4.447469711303711,
+      "eval_runtime": 4.0239,
+      "eval_samples_per_second": 248.518,
+      "eval_steps_per_second": 5.219,
+      "step": 6500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.1739484320314163e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null