Training in progress, step 4071, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +501 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b18bf2ef5a58fafa878b248cbf68b82419946e906651ff194b5c76c056fb08b
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:019f7d564a5c9d518bec0e30d9649ba99937af670238d8452968fa5b1413a528
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01e166d9d5b84714454a7cc37b98e06d7bae8e84fb581970ec17a8c1840e30fe
 size 51613668

 version https://git-lfs.github.com/spec/v1
+oid sha256:72e4711ba0b347f94b175a3390cd1f352cd517007e6176a860b09cd59f1edf4e
 size 51613668

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f0b89a8cec8741d7852668aebad53cb8460a6fb7ce6157d3a1c6ef281fa056d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0c791d11fe3a154567778d2d9453c793a7de48963a5332d5e4afe615c58f1f4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:56e05c57415a8b6cee746a07459d6221af8b9d5f7345da95a6363db12a8223b3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c84513d13cd2d96eb71f4021c545ed7c31d2c720ba1e9098bc883af81a53ca7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.21696823835372925,
   "best_model_checkpoint": "miner_id_24/checkpoint-4000",
-  "epoch": 2.9465930018416207,
   "eval_steps": 200,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -28175,6 +28175,503 @@
       "eval_samples_per_second": 31.796,
       "eval_steps_per_second": 2.033,
       "step": 4000
     }
   ],
   "logging_steps": 1,
@@ -28198,12 +28695,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.0183041122893824e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.21696823835372925,
   "best_model_checkpoint": "miner_id_24/checkpoint-4000",
+  "epoch": 2.998895027624309,
   "eval_steps": 200,
+  "global_step": 4071,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.796,
       "eval_steps_per_second": 2.033,
       "step": 4000
+    },
+    {
+      "epoch": 2.947329650092081,
+      "grad_norm": 0.3928582966327667,
+      "learning_rate": 1.480406367429299e-07,
+      "loss": 0.1097,
+      "step": 4001
+    },
+    {
+      "epoch": 2.9480662983425416,
+      "grad_norm": 0.4033927619457245,
+      "learning_rate": 1.4384212371320039e-07,
+      "loss": 0.1031,
+      "step": 4002
+    },
+    {
+      "epoch": 2.948802946593002,
+      "grad_norm": 0.31086212396621704,
+      "learning_rate": 1.397039634071895e-07,
+      "loss": 0.0823,
+      "step": 4003
+    },
+    {
+      "epoch": 2.949539594843462,
+      "grad_norm": 0.363800972700119,
+      "learning_rate": 1.3562615832597437e-07,
+      "loss": 0.1162,
+      "step": 4004
+    },
+    {
+      "epoch": 2.9502762430939224,
+      "grad_norm": 0.36636438965797424,
+      "learning_rate": 1.3160871093416128e-07,
+      "loss": 0.1065,
+      "step": 4005
+    },
+    {
+      "epoch": 2.951012891344383,
+      "grad_norm": 0.44840720295906067,
+      "learning_rate": 1.276516236598857e-07,
+      "loss": 0.139,
+      "step": 4006
+    },
+    {
+      "epoch": 2.9517495395948434,
+      "grad_norm": 0.37990802526474,
+      "learning_rate": 1.237548988948123e-07,
+      "loss": 0.1155,
+      "step": 4007
+    },
+    {
+      "epoch": 2.952486187845304,
+      "grad_norm": 0.3310796618461609,
+      "learning_rate": 1.1991853899409044e-07,
+      "loss": 0.1023,
+      "step": 4008
+    },
+    {
+      "epoch": 2.9532228360957644,
+      "grad_norm": 0.525777280330658,
+      "learning_rate": 1.1614254627640986e-07,
+      "loss": 0.1692,
+      "step": 4009
+    },
+    {
+      "epoch": 2.9539594843462247,
+      "grad_norm": 0.3540642559528351,
+      "learning_rate": 1.1242692302395608e-07,
+      "loss": 0.0742,
+      "step": 4010
+    },
+    {
+      "epoch": 2.954696132596685,
+      "grad_norm": 0.4345633089542389,
+      "learning_rate": 1.0877167148246604e-07,
+      "loss": 0.1342,
+      "step": 4011
+    },
+    {
+      "epoch": 2.9554327808471452,
+      "grad_norm": 0.38242772221565247,
+      "learning_rate": 1.0517679386113922e-07,
+      "loss": 0.1004,
+      "step": 4012
+    },
+    {
+      "epoch": 2.956169429097606,
+      "grad_norm": 0.43410253524780273,
+      "learning_rate": 1.0164229233271538e-07,
+      "loss": 0.1084,
+      "step": 4013
+    },
+    {
+      "epoch": 2.956906077348066,
+      "grad_norm": 0.5683735609054565,
+      "learning_rate": 9.816816903343018e-08,
+      "loss": 0.1536,
+      "step": 4014
+    },
+    {
+      "epoch": 2.957642725598527,
+      "grad_norm": 0.3811461627483368,
+      "learning_rate": 9.47544260630373e-08,
+      "loss": 0.0991,
+      "step": 4015
+    },
+    {
+      "epoch": 2.958379373848987,
+      "grad_norm": 0.45557355880737305,
+      "learning_rate": 9.140106548478633e-08,
+      "loss": 0.1312,
+      "step": 4016
+    },
+    {
+      "epoch": 2.9591160220994475,
+      "grad_norm": 0.3726716935634613,
+      "learning_rate": 8.81080893254449e-08,
+      "loss": 0.1225,
+      "step": 4017
+    },
+    {
+      "epoch": 2.9598526703499077,
+      "grad_norm": 0.44345927238464355,
+      "learning_rate": 8.487549957526541e-08,
+      "loss": 0.1332,
+      "step": 4018
+    },
+    {
+      "epoch": 2.9605893186003684,
+      "grad_norm": 0.41493895649909973,
+      "learning_rate": 8.170329818802947e-08,
+      "loss": 0.1248,
+      "step": 4019
+    },
+    {
+      "epoch": 2.9613259668508287,
+      "grad_norm": 0.4436560869216919,
+      "learning_rate": 7.859148708099228e-08,
+      "loss": 0.1317,
+      "step": 4020
+    },
+    {
+      "epoch": 2.962062615101289,
+      "grad_norm": 0.3871510624885559,
+      "learning_rate": 7.554006813491609e-08,
+      "loss": 0.1029,
+      "step": 4021
+    },
+    {
+      "epoch": 2.9627992633517497,
+      "grad_norm": 0.42555782198905945,
+      "learning_rate": 7.254904319409229e-08,
+      "loss": 0.1519,
+      "step": 4022
+    },
+    {
+      "epoch": 2.96353591160221,
+      "grad_norm": 0.4393730163574219,
+      "learning_rate": 6.961841406626368e-08,
+      "loss": 0.0969,
+      "step": 4023
+    },
+    {
+      "epoch": 2.9642725598526702,
+      "grad_norm": 0.43214312195777893,
+      "learning_rate": 6.674818252270232e-08,
+      "loss": 0.1308,
+      "step": 4024
+    },
+    {
+      "epoch": 2.9650092081031305,
+      "grad_norm": 0.3302169442176819,
+      "learning_rate": 6.393835029816497e-08,
+      "loss": 0.1412,
+      "step": 4025
+    },
+    {
+      "epoch": 2.9657458563535912,
+      "grad_norm": 0.42520150542259216,
+      "learning_rate": 6.11889190909043e-08,
+      "loss": 0.1354,
+      "step": 4026
+    },
+    {
+      "epoch": 2.9664825046040515,
+      "grad_norm": 0.4056524932384491,
+      "learning_rate": 5.84998905626799e-08,
+      "loss": 0.1074,
+      "step": 4027
+    },
+    {
+      "epoch": 2.967219152854512,
+      "grad_norm": 0.40226319432258606,
+      "learning_rate": 5.5871266338702876e-08,
+      "loss": 0.1444,
+      "step": 4028
+    },
+    {
+      "epoch": 2.9679558011049725,
+      "grad_norm": 0.370077520608902,
+      "learning_rate": 5.330304800772456e-08,
+      "loss": 0.1096,
+      "step": 4029
+    },
+    {
+      "epoch": 2.9686924493554327,
+      "grad_norm": 0.4626006782054901,
+      "learning_rate": 5.0795237121969984e-08,
+      "loss": 0.145,
+      "step": 4030
+    },
+    {
+      "epoch": 2.969429097605893,
+      "grad_norm": 0.45213082432746887,
+      "learning_rate": 4.8347835197137814e-08,
+      "loss": 0.1208,
+      "step": 4031
+    },
+    {
+      "epoch": 2.9701657458563537,
+      "grad_norm": 0.43094053864479065,
+      "learning_rate": 4.59608437124337e-08,
+      "loss": 0.1237,
+      "step": 4032
+    },
+    {
+      "epoch": 2.970902394106814,
+      "grad_norm": 0.42492741346359253,
+      "learning_rate": 4.363426411055915e-08,
+      "loss": 0.1358,
+      "step": 4033
+    },
+    {
+      "epoch": 2.9716390423572743,
+      "grad_norm": 0.4052026569843292,
+      "learning_rate": 4.1368097797678255e-08,
+      "loss": 0.1083,
+      "step": 4034
+    },
+    {
+      "epoch": 2.972375690607735,
+      "grad_norm": 0.42593371868133545,
+      "learning_rate": 3.916234614346204e-08,
+      "loss": 0.1469,
+      "step": 4035
+    },
+    {
+      "epoch": 2.9731123388581953,
+      "grad_norm": 0.40533506870269775,
+      "learning_rate": 3.701701048105521e-08,
+      "loss": 0.1215,
+      "step": 4036
+    },
+    {
+      "epoch": 2.9738489871086555,
+      "grad_norm": 0.5333660244941711,
+      "learning_rate": 3.493209210708725e-08,
+      "loss": 0.1454,
+      "step": 4037
+    },
+    {
+      "epoch": 2.974585635359116,
+      "grad_norm": 0.4893753230571747,
+      "learning_rate": 3.29075922816946e-08,
+      "loss": 0.1435,
+      "step": 4038
+    },
+    {
+      "epoch": 2.9753222836095765,
+      "grad_norm": 0.39299649000167847,
+      "learning_rate": 3.094351222844294e-08,
+      "loss": 0.1066,
+      "step": 4039
+    },
+    {
+      "epoch": 2.976058931860037,
+      "grad_norm": 0.4104492962360382,
+      "learning_rate": 2.9039853134449348e-08,
+      "loss": 0.0963,
+      "step": 4040
+    },
+    {
+      "epoch": 2.9767955801104975,
+      "grad_norm": 0.4806900918483734,
+      "learning_rate": 2.7196616150271247e-08,
+      "loss": 0.1255,
+      "step": 4041
+    },
+    {
+      "epoch": 2.9775322283609578,
+      "grad_norm": 0.4183269143104553,
+      "learning_rate": 2.5413802389939735e-08,
+      "loss": 0.1456,
+      "step": 4042
+    },
+    {
+      "epoch": 2.978268876611418,
+      "grad_norm": 0.4006012976169586,
+      "learning_rate": 2.3691412930992864e-08,
+      "loss": 0.1109,
+      "step": 4043
+    },
+    {
+      "epoch": 2.9790055248618783,
+      "grad_norm": 0.4611269235610962,
+      "learning_rate": 2.2029448814431253e-08,
+      "loss": 0.1349,
+      "step": 4044
+    },
+    {
+      "epoch": 2.979742173112339,
+      "grad_norm": 0.4572463631629944,
+      "learning_rate": 2.0427911044751392e-08,
+      "loss": 0.1125,
+      "step": 4045
+    },
+    {
+      "epoch": 2.9804788213627993,
+      "grad_norm": 0.3652282953262329,
+      "learning_rate": 1.8886800589912322e-08,
+      "loss": 0.0912,
+      "step": 4046
+    },
+    {
+      "epoch": 2.9812154696132596,
+      "grad_norm": 0.4529966413974762,
+      "learning_rate": 1.7406118381346758e-08,
+      "loss": 0.0887,
+      "step": 4047
+    },
+    {
+      "epoch": 2.9819521178637203,
+      "grad_norm": 0.41920584440231323,
+      "learning_rate": 1.5985865313994374e-08,
+      "loss": 0.1024,
+      "step": 4048
+    },
+    {
+      "epoch": 2.9826887661141805,
+      "grad_norm": 0.38177281618118286,
+      "learning_rate": 1.4626042246224103e-08,
+      "loss": 0.1005,
+      "step": 4049
+    },
+    {
+      "epoch": 2.983425414364641,
+      "grad_norm": 0.46106603741645813,
+      "learning_rate": 1.3326649999934049e-08,
+      "loss": 0.1222,
+      "step": 4050
+    },
+    {
+      "epoch": 2.984162062615101,
+      "grad_norm": 0.44589948654174805,
+      "learning_rate": 1.208768936045157e-08,
+      "loss": 0.1088,
+      "step": 4051
+    },
+    {
+      "epoch": 2.984898710865562,
+      "grad_norm": 0.4326612651348114,
+      "learning_rate": 1.09091610766221e-08,
+      "loss": 0.1544,
+      "step": 4052
+    },
+    {
+      "epoch": 2.985635359116022,
+      "grad_norm": 0.40109142661094666,
+      "learning_rate": 9.791065860720317e-09,
+      "loss": 0.1251,
+      "step": 4053
+    },
+    {
+      "epoch": 2.9863720073664823,
+      "grad_norm": 0.418179452419281,
+      "learning_rate": 8.733404388538979e-09,
+      "loss": 0.091,
+      "step": 4054
+    },
+    {
+      "epoch": 2.987108655616943,
+      "grad_norm": 0.41939765214920044,
+      "learning_rate": 7.736177299311198e-09,
+      "loss": 0.1116,
+      "step": 4055
+    },
+    {
+      "epoch": 2.9878453038674033,
+      "grad_norm": 0.4822397232055664,
+      "learning_rate": 6.79938519576595e-09,
+      "loss": 0.1445,
+      "step": 4056
+    },
+    {
+      "epoch": 2.9885819521178636,
+      "grad_norm": 0.33148708939552307,
+      "learning_rate": 5.9230286440947745e-09,
+      "loss": 0.0995,
+      "step": 4057
+    },
+    {
+      "epoch": 2.989318600368324,
+      "grad_norm": 0.394879549741745,
+      "learning_rate": 5.107108173962871e-09,
+      "loss": 0.0885,
+      "step": 4058
+    },
+    {
+      "epoch": 2.9900552486187846,
+      "grad_norm": 0.535071849822998,
+      "learning_rate": 4.351624278509103e-09,
+      "loss": 0.1855,
+      "step": 4059
+    },
+    {
+      "epoch": 2.990791896869245,
+      "grad_norm": 0.39724433422088623,
+      "learning_rate": 3.6565774143459962e-09,
+      "loss": 0.1311,
+      "step": 4060
+    },
+    {
+      "epoch": 2.9915285451197056,
+      "grad_norm": 0.4425889551639557,
+      "learning_rate": 3.0219680015708406e-09,
+      "loss": 0.1081,
+      "step": 4061
+    },
+    {
+      "epoch": 2.992265193370166,
+      "grad_norm": 0.35188987851142883,
+      "learning_rate": 2.4477964237212824e-09,
+      "loss": 0.1005,
+      "step": 4062
+    },
+    {
+      "epoch": 2.993001841620626,
+      "grad_norm": 0.42041268944740295,
+      "learning_rate": 1.9340630278308345e-09,
+      "loss": 0.1107,
+      "step": 4063
+    },
+    {
+      "epoch": 2.9937384898710864,
+      "grad_norm": 0.43599700927734375,
+      "learning_rate": 1.4807681243955707e-09,
+      "loss": 0.102,
+      "step": 4064
+    },
+    {
+      "epoch": 2.994475138121547,
+      "grad_norm": 0.4020419716835022,
+      "learning_rate": 1.0879119873852262e-09,
+      "loss": 0.1328,
+      "step": 4065
+    },
+    {
+      "epoch": 2.9952117863720074,
+      "grad_norm": 0.5001260042190552,
+      "learning_rate": 7.554948542543017e-10,
+      "loss": 0.1634,
+      "step": 4066
+    },
+    {
+      "epoch": 2.9959484346224676,
+      "grad_norm": 0.48046594858169556,
+      "learning_rate": 4.835169258976536e-10,
+      "loss": 0.1276,
+      "step": 4067
+    },
+    {
+      "epoch": 2.9966850828729283,
+      "grad_norm": 0.3800838589668274,
+      "learning_rate": 2.71978366706005e-10,
+      "loss": 0.0995,
+      "step": 4068
+    },
+    {
+      "epoch": 2.9974217311233886,
+      "grad_norm": 0.38035425543785095,
+      "learning_rate": 1.2087930453263952e-10,
+      "loss": 0.1164,
+      "step": 4069
+    },
+    {
+      "epoch": 2.998158379373849,
+      "grad_norm": 0.48082515597343445,
+      "learning_rate": 3.021983070450318e-11,
+      "loss": 0.1213,
+      "step": 4070
+    },
+    {
+      "epoch": 2.998895027624309,
+      "grad_norm": 0.3878474831581116,
+      "learning_rate": 0.0,
+      "loss": 0.103,
+      "step": 4071
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.089665390861353e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null