Training in progress, step 6500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f198d05f5a6f7322d5950baad97f98d6f59bcdb9ed02f220583ce5fd10a379c7
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccb4549408e52f631e5a2754236ea70999d0d21bd6cdb0e3578808e3ad0ec0af
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:302e7a816c65dc7ea036853d2e134881bf37e4d7e3ce31f671702ad86c5f1616
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5fa1630ee9673533bbca0fabe5cc81512e307a45647863ef671c972b6a648c2
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef678004bfc53268aeb4845a442c0327144244832e571a2be41a7160145765eb
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:b04f884e61b89876d8b9b16b9a44bf2c7f027c2c95e35ca0aba5b86933c2288c
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5732bb4fae95fda377427872ad7c4fed0c45a84922701b3143ffa39cf761f9db
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad5a3c7ee6384cdea60f7a41957135fc1d6a8e0bdd3b9a0dd5c4c46f69d638ec
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0136847440446022,
   "eval_steps": 500,
-  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4311,6 +4311,364 @@
       "eval_samples_per_second": 276.003,
       "eval_steps_per_second": 5.796,
       "step": 6000
     }
   ],
   "logging_steps": 10,
@@ -4330,7 +4688,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.0067200216019763e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0981584727149856,
   "eval_steps": 500,
+  "global_step": 6500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 276.003,
       "eval_steps_per_second": 5.796,
       "step": 6000
+    },
+    {
+      "epoch": 1.0153742186180097,
+      "grad_norm": 0.518569827079773,
+      "learning_rate": 0.00019297799453520028,
+      "loss": 4.499275207519531,
+      "step": 6010
+    },
+    {
+      "epoch": 1.0170636931914174,
+      "grad_norm": 0.5655678510665894,
+      "learning_rate": 0.00019251885987680252,
+      "loss": 4.483604049682617,
+      "step": 6020
+    },
+    {
+      "epoch": 1.0187531677648252,
+      "grad_norm": 0.5033740997314453,
+      "learning_rate": 0.00019205929164029217,
+      "loss": 4.474781036376953,
+      "step": 6030
+    },
+    {
+      "epoch": 1.020442642338233,
+      "grad_norm": 0.5125960111618042,
+      "learning_rate": 0.00019159929451203033,
+      "loss": 4.485195922851562,
+      "step": 6040
+    },
+    {
+      "epoch": 1.0221321169116404,
+      "grad_norm": 0.5002242922782898,
+      "learning_rate": 0.00019113887318275149,
+      "loss": 4.486893081665039,
+      "step": 6050
+    },
+    {
+      "epoch": 1.0238215914850481,
+      "grad_norm": 0.48877793550491333,
+      "learning_rate": 0.00019067803234751603,
+      "loss": 4.473563766479492,
+      "step": 6060
+    },
+    {
+      "epoch": 1.0255110660584559,
+      "grad_norm": 0.485661119222641,
+      "learning_rate": 0.00019021677670566208,
+      "loss": 4.469658660888672,
+      "step": 6070
+    },
+    {
+      "epoch": 1.0272005406318634,
+      "grad_norm": 0.5000821352005005,
+      "learning_rate": 0.00018975511096075762,
+      "loss": 4.504412078857422,
+      "step": 6080
+    },
+    {
+      "epoch": 1.0288900152052711,
+      "grad_norm": 0.5075719356536865,
+      "learning_rate": 0.00018929303982055272,
+      "loss": 4.497782135009766,
+      "step": 6090
+    },
+    {
+      "epoch": 1.0305794897786789,
+      "grad_norm": 0.477532297372818,
+      "learning_rate": 0.00018883056799693125,
+      "loss": 4.46100082397461,
+      "step": 6100
+    },
+    {
+      "epoch": 1.0322689643520866,
+      "grad_norm": 0.5213661789894104,
+      "learning_rate": 0.00018836770020586315,
+      "loss": 4.476996612548828,
+      "step": 6110
+    },
+    {
+      "epoch": 1.033958438925494,
+      "grad_norm": 0.5093067288398743,
+      "learning_rate": 0.00018790444116735595,
+      "loss": 4.477323150634765,
+      "step": 6120
+    },
+    {
+      "epoch": 1.0356479134989018,
+      "grad_norm": 0.480839341878891,
+      "learning_rate": 0.00018744079560540695,
+      "loss": 4.478923797607422,
+      "step": 6130
+    },
+    {
+      "epoch": 1.0373373880723096,
+      "grad_norm": 0.47398701310157776,
+      "learning_rate": 0.000186976768247955,
+      "loss": 4.478921508789062,
+      "step": 6140
+    },
+    {
+      "epoch": 1.039026862645717,
+      "grad_norm": 0.4890805780887604,
+      "learning_rate": 0.00018651236382683225,
+      "loss": 4.468624877929687,
+      "step": 6150
+    },
+    {
+      "epoch": 1.0407163372191248,
+      "grad_norm": 0.49367958307266235,
+      "learning_rate": 0.0001860475870777157,
+      "loss": 4.472190475463867,
+      "step": 6160
+    },
+    {
+      "epoch": 1.0424058117925326,
+      "grad_norm": 0.4590769112110138,
+      "learning_rate": 0.0001855824427400793,
+      "loss": 4.449500274658203,
+      "step": 6170
+    },
+    {
+      "epoch": 1.0440952863659403,
+      "grad_norm": 0.4810253381729126,
+      "learning_rate": 0.00018511693555714535,
+      "loss": 4.490542221069336,
+      "step": 6180
+    },
+    {
+      "epoch": 1.0457847609393478,
+      "grad_norm": 0.5299515128135681,
+      "learning_rate": 0.00018465107027583615,
+      "loss": 4.474026489257812,
+      "step": 6190
+    },
+    {
+      "epoch": 1.0474742355127555,
+      "grad_norm": 0.4833298623561859,
+      "learning_rate": 0.00018418485164672574,
+      "loss": 4.473223114013672,
+      "step": 6200
+    },
+    {
+      "epoch": 1.0491637100861633,
+      "grad_norm": 0.4987802803516388,
+      "learning_rate": 0.00018371828442399128,
+      "loss": 4.467764663696289,
+      "step": 6210
+    },
+    {
+      "epoch": 1.0508531846595708,
+      "grad_norm": 0.49086934328079224,
+      "learning_rate": 0.00018325137336536464,
+      "loss": 4.441515350341797,
+      "step": 6220
+    },
+    {
+      "epoch": 1.0525426592329785,
+      "grad_norm": 0.5031701326370239,
+      "learning_rate": 0.00018278412323208392,
+      "loss": 4.483510208129883,
+      "step": 6230
+    },
+    {
+      "epoch": 1.0542321338063863,
+      "grad_norm": 0.509184718132019,
+      "learning_rate": 0.00018231653878884486,
+      "loss": 4.485199356079102,
+      "step": 6240
+    },
+    {
+      "epoch": 1.055921608379794,
+      "grad_norm": 0.48335397243499756,
+      "learning_rate": 0.00018184862480375233,
+      "loss": 4.454570388793945,
+      "step": 6250
+    },
+    {
+      "epoch": 1.0576110829532015,
+      "grad_norm": 0.5146468281745911,
+      "learning_rate": 0.00018138038604827153,
+      "loss": 4.477815628051758,
+      "step": 6260
+    },
+    {
+      "epoch": 1.0593005575266092,
+      "grad_norm": 0.5049527883529663,
+      "learning_rate": 0.0001809118272971795,
+      "loss": 4.445434951782227,
+      "step": 6270
+    },
+    {
+      "epoch": 1.060990032100017,
+      "grad_norm": 0.47304192185401917,
+      "learning_rate": 0.0001804429533285164,
+      "loss": 4.458169555664062,
+      "step": 6280
+    },
+    {
+      "epoch": 1.0626795066734245,
+      "grad_norm": 0.4755364954471588,
+      "learning_rate": 0.00017997376892353668,
+      "loss": 4.495440292358398,
+      "step": 6290
+    },
+    {
+      "epoch": 1.0643689812468322,
+      "grad_norm": 0.49506038427352905,
+      "learning_rate": 0.0001795042788666605,
+      "loss": 4.4639404296875,
+      "step": 6300
+    },
+    {
+      "epoch": 1.06605845582024,
+      "grad_norm": 0.5216291546821594,
+      "learning_rate": 0.00017903448794542488,
+      "loss": 4.4542278289794925,
+      "step": 6310
+    },
+    {
+      "epoch": 1.0677479303936477,
+      "grad_norm": 0.5284595489501953,
+      "learning_rate": 0.00017856440095043464,
+      "loss": 4.479632186889648,
+      "step": 6320
+    },
+    {
+      "epoch": 1.0694374049670552,
+      "grad_norm": 0.5182107090950012,
+      "learning_rate": 0.00017809402267531405,
+      "loss": 4.4362133026123045,
+      "step": 6330
+    },
+    {
+      "epoch": 1.071126879540463,
+      "grad_norm": 0.5018042922019958,
+      "learning_rate": 0.00017762335791665735,
+      "loss": 4.452248001098633,
+      "step": 6340
+    },
+    {
+      "epoch": 1.0728163541138707,
+      "grad_norm": 0.5280482172966003,
+      "learning_rate": 0.00017715241147398035,
+      "loss": 4.464836120605469,
+      "step": 6350
+    },
+    {
+      "epoch": 1.0745058286872782,
+      "grad_norm": 0.47761428356170654,
+      "learning_rate": 0.00017668118814967126,
+      "loss": 4.447597503662109,
+      "step": 6360
+    },
+    {
+      "epoch": 1.076195303260686,
+      "grad_norm": 0.4841929078102112,
+      "learning_rate": 0.00017620969274894163,
+      "loss": 4.4613292694091795,
+      "step": 6370
+    },
+    {
+      "epoch": 1.0778847778340936,
+      "grad_norm": 0.5038534998893738,
+      "learning_rate": 0.00017573793007977763,
+      "loss": 4.451330184936523,
+      "step": 6380
+    },
+    {
+      "epoch": 1.0795742524075012,
+      "grad_norm": 0.5004971027374268,
+      "learning_rate": 0.0001752659049528906,
+      "loss": 4.457633972167969,
+      "step": 6390
+    },
+    {
+      "epoch": 1.081263726980909,
+      "grad_norm": 0.5123668909072876,
+      "learning_rate": 0.00017479362218166854,
+      "loss": 4.443200302124024,
+      "step": 6400
+    },
+    {
+      "epoch": 1.0829532015543166,
+      "grad_norm": 0.5099160075187683,
+      "learning_rate": 0.0001743210865821265,
+      "loss": 4.436219787597656,
+      "step": 6410
+    },
+    {
+      "epoch": 1.0846426761277244,
+      "grad_norm": 0.5162463784217834,
+      "learning_rate": 0.0001738483029728578,
+      "loss": 4.45533561706543,
+      "step": 6420
+    },
+    {
+      "epoch": 1.0863321507011319,
+      "grad_norm": 0.5178755521774292,
+      "learning_rate": 0.00017337527617498474,
+      "loss": 4.48522720336914,
+      "step": 6430
+    },
+    {
+      "epoch": 1.0880216252745396,
+      "grad_norm": 0.49394717812538147,
+      "learning_rate": 0.0001729020110121096,
+      "loss": 4.447189712524414,
+      "step": 6440
+    },
+    {
+      "epoch": 1.0897110998479473,
+      "grad_norm": 0.4908885955810547,
+      "learning_rate": 0.0001724285123102652,
+      "loss": 4.457671737670898,
+      "step": 6450
+    },
+    {
+      "epoch": 1.091400574421355,
+      "grad_norm": 0.5045267343521118,
+      "learning_rate": 0.00017195478489786593,
+      "loss": 4.435376358032227,
+      "step": 6460
+    },
+    {
+      "epoch": 1.0930900489947626,
+      "grad_norm": 0.5065691471099854,
+      "learning_rate": 0.00017148083360565836,
+      "loss": 4.435953903198242,
+      "step": 6470
+    },
+    {
+      "epoch": 1.0947795235681703,
+      "grad_norm": 0.4825722575187683,
+      "learning_rate": 0.00017100666326667202,
+      "loss": 4.4766490936279295,
+      "step": 6480
+    },
+    {
+      "epoch": 1.096468998141578,
+      "grad_norm": 0.4787653982639313,
+      "learning_rate": 0.00017053227871617027,
+      "loss": 4.448079299926758,
+      "step": 6490
+    },
+    {
+      "epoch": 1.0981584727149856,
+      "grad_norm": 0.5119611024856567,
+      "learning_rate": 0.00017005768479160064,
+      "loss": 4.452360534667969,
+      "step": 6500
+    },
+    {
+      "epoch": 1.0981584727149856,
+      "eval_loss": 4.429732799530029,
+      "eval_runtime": 3.6558,
+      "eval_samples_per_second": 273.54,
+      "eval_steps_per_second": 5.744,
+      "step": 6500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.1739484320314163e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null