Training in progress, step 1250, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e2d54a3801d46b95727c773606930e9bf969e1d8cccfe3cf60831391b78af8c
 size 1279323952

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d76b1ba4f1499da4a8ed5a8300e6dc366438ac57bf8279ee969fd0b2ca4728b
 size 1279323952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45b613831ea7dfc39f9271816e0b4214759cb3f9e2291ebc3ef260f6bcfe14ae
 size 2558803194

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb84ed3f14cc528de2ab5492455762837c66a3b6250725b8ebe7d7239de5d41b
 size 2558803194

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cfcf09015728d40d62e893b725e0f8ac8452a70bd3fbf45bd8318f119b8ceb8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2e7a6cc60a56e9e41bc4d01760bda59a0858d99962b8232c0c24c34810dcfef
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f350703ce652c5593eab3ae70fe8c7f19b12b89d7e78f06c4b41397240922d98
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0aaab3ece7f21f6134c7946ccfd6c2682813ad47096098a1e3a156edc7ad945a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.9303643703460693,
   "best_model_checkpoint": "miner_id_24/checkpoint-1200",
-  "epoch": 0.6295907660020986,
   "eval_steps": 50,
-  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -8607,6 +8607,364 @@
       "eval_samples_per_second": 5.662,
       "eval_steps_per_second": 1.891,
       "step": 1200
     }
   ],
   "logging_steps": 1,
@@ -8621,7 +8979,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -8635,7 +8993,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.201641398193357e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.9303643703460693,
   "best_model_checkpoint": "miner_id_24/checkpoint-1200",
+  "epoch": 0.6558237145855194,
   "eval_steps": 50,
+  "global_step": 1250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.662,
       "eval_steps_per_second": 1.891,
       "step": 1200
+    },
+    {
+      "epoch": 0.630115424973767,
+      "grad_norm": 0.6785674095153809,
+      "learning_rate": 0.0001792738673354654,
+      "loss": 8.3901,
+      "step": 1201
+    },
+    {
+      "epoch": 0.6306400839454355,
+      "grad_norm": 0.6648474931716919,
+      "learning_rate": 0.00017924030602439216,
+      "loss": 7.8052,
+      "step": 1202
+    },
+    {
+      "epoch": 0.6311647429171039,
+      "grad_norm": 0.6322881579399109,
+      "learning_rate": 0.00017920672070963228,
+      "loss": 8.0898,
+      "step": 1203
+    },
+    {
+      "epoch": 0.6316894018887723,
+      "grad_norm": 0.6528663039207458,
+      "learning_rate": 0.0001791731114013594,
+      "loss": 8.0178,
+      "step": 1204
+    },
+    {
+      "epoch": 0.6322140608604407,
+      "grad_norm": 0.7760594487190247,
+      "learning_rate": 0.00017913947810975462,
+      "loss": 7.8516,
+      "step": 1205
+    },
+    {
+      "epoch": 0.6327387198321092,
+      "grad_norm": 0.7572141289710999,
+      "learning_rate": 0.0001791058208450062,
+      "loss": 7.5367,
+      "step": 1206
+    },
+    {
+      "epoch": 0.6332633788037776,
+      "grad_norm": 0.952572762966156,
+      "learning_rate": 0.00017907213961730972,
+      "loss": 8.8389,
+      "step": 1207
+    },
+    {
+      "epoch": 0.633788037775446,
+      "grad_norm": 0.8288649320602417,
+      "learning_rate": 0.00017903843443686795,
+      "loss": 8.2295,
+      "step": 1208
+    },
+    {
+      "epoch": 0.6343126967471143,
+      "grad_norm": 0.9002505540847778,
+      "learning_rate": 0.00017900470531389095,
+      "loss": 8.2813,
+      "step": 1209
+    },
+    {
+      "epoch": 0.6348373557187827,
+      "grad_norm": 0.8269932866096497,
+      "learning_rate": 0.00017897095225859606,
+      "loss": 8.9073,
+      "step": 1210
+    },
+    {
+      "epoch": 0.6353620146904512,
+      "grad_norm": 0.8590414524078369,
+      "learning_rate": 0.00017893717528120784,
+      "loss": 7.6909,
+      "step": 1211
+    },
+    {
+      "epoch": 0.6358866736621196,
+      "grad_norm": 1.1503770351409912,
+      "learning_rate": 0.00017890337439195811,
+      "loss": 8.3473,
+      "step": 1212
+    },
+    {
+      "epoch": 0.636411332633788,
+      "grad_norm": 0.7925499081611633,
+      "learning_rate": 0.00017886954960108587,
+      "loss": 7.5837,
+      "step": 1213
+    },
+    {
+      "epoch": 0.6369359916054564,
+      "grad_norm": 1.0551660060882568,
+      "learning_rate": 0.0001788357009188375,
+      "loss": 8.6293,
+      "step": 1214
+    },
+    {
+      "epoch": 0.6374606505771249,
+      "grad_norm": 1.1928114891052246,
+      "learning_rate": 0.0001788018283554665,
+      "loss": 8.7884,
+      "step": 1215
+    },
+    {
+      "epoch": 0.6379853095487933,
+      "grad_norm": 0.8815275430679321,
+      "learning_rate": 0.00017876793192123365,
+      "loss": 7.4191,
+      "step": 1216
+    },
+    {
+      "epoch": 0.6385099685204617,
+      "grad_norm": 1.006833553314209,
+      "learning_rate": 0.0001787340116264069,
+      "loss": 7.6687,
+      "step": 1217
+    },
+    {
+      "epoch": 0.6390346274921301,
+      "grad_norm": 0.9964754581451416,
+      "learning_rate": 0.00017870006748126153,
+      "loss": 7.8293,
+      "step": 1218
+    },
+    {
+      "epoch": 0.6395592864637986,
+      "grad_norm": 1.082804799079895,
+      "learning_rate": 0.00017866609949607995,
+      "loss": 8.009,
+      "step": 1219
+    },
+    {
+      "epoch": 0.640083945435467,
+      "grad_norm": 1.3665459156036377,
+      "learning_rate": 0.0001786321076811519,
+      "loss": 8.2669,
+      "step": 1220
+    },
+    {
+      "epoch": 0.6406086044071354,
+      "grad_norm": 1.0634828805923462,
+      "learning_rate": 0.0001785980920467742,
+      "loss": 8.3186,
+      "step": 1221
+    },
+    {
+      "epoch": 0.6411332633788038,
+      "grad_norm": 1.1793396472930908,
+      "learning_rate": 0.000178564052603251,
+      "loss": 8.2618,
+      "step": 1222
+    },
+    {
+      "epoch": 0.6416579223504721,
+      "grad_norm": 1.1798900365829468,
+      "learning_rate": 0.0001785299893608936,
+      "loss": 8.0329,
+      "step": 1223
+    },
+    {
+      "epoch": 0.6421825813221406,
+      "grad_norm": 1.1078875064849854,
+      "learning_rate": 0.00017849590233002052,
+      "loss": 7.5323,
+      "step": 1224
+    },
+    {
+      "epoch": 0.642707240293809,
+      "grad_norm": 1.2947778701782227,
+      "learning_rate": 0.0001784617915209575,
+      "loss": 8.3191,
+      "step": 1225
+    },
+    {
+      "epoch": 0.6432318992654774,
+      "grad_norm": 1.0576255321502686,
+      "learning_rate": 0.00017842765694403752,
+      "loss": 8.1193,
+      "step": 1226
+    },
+    {
+      "epoch": 0.6437565582371458,
+      "grad_norm": 1.1417510509490967,
+      "learning_rate": 0.00017839349860960068,
+      "loss": 8.0367,
+      "step": 1227
+    },
+    {
+      "epoch": 0.6442812172088143,
+      "grad_norm": 1.2275340557098389,
+      "learning_rate": 0.0001783593165279943,
+      "loss": 8.2202,
+      "step": 1228
+    },
+    {
+      "epoch": 0.6448058761804827,
+      "grad_norm": 1.2881832122802734,
+      "learning_rate": 0.00017832511070957295,
+      "loss": 8.2294,
+      "step": 1229
+    },
+    {
+      "epoch": 0.6453305351521511,
+      "grad_norm": 1.375793695449829,
+      "learning_rate": 0.00017829088116469834,
+      "loss": 7.1402,
+      "step": 1230
+    },
+    {
+      "epoch": 0.6458551941238195,
+      "grad_norm": 1.2434371709823608,
+      "learning_rate": 0.00017825662790373935,
+      "loss": 8.8262,
+      "step": 1231
+    },
+    {
+      "epoch": 0.646379853095488,
+      "grad_norm": 1.3222373723983765,
+      "learning_rate": 0.00017822235093707206,
+      "loss": 7.7723,
+      "step": 1232
+    },
+    {
+      "epoch": 0.6469045120671564,
+      "grad_norm": 1.4988670349121094,
+      "learning_rate": 0.0001781880502750798,
+      "loss": 7.4396,
+      "step": 1233
+    },
+    {
+      "epoch": 0.6474291710388248,
+      "grad_norm": 1.1858347654342651,
+      "learning_rate": 0.00017815372592815297,
+      "loss": 8.38,
+      "step": 1234
+    },
+    {
+      "epoch": 0.6479538300104932,
+      "grad_norm": 1.3001600503921509,
+      "learning_rate": 0.0001781193779066892,
+      "loss": 7.4005,
+      "step": 1235
+    },
+    {
+      "epoch": 0.6484784889821616,
+      "grad_norm": 1.302140712738037,
+      "learning_rate": 0.0001780850062210933,
+      "loss": 7.0935,
+      "step": 1236
+    },
+    {
+      "epoch": 0.64900314795383,
+      "grad_norm": 1.4957777261734009,
+      "learning_rate": 0.0001780506108817772,
+      "loss": 8.0204,
+      "step": 1237
+    },
+    {
+      "epoch": 0.6495278069254984,
+      "grad_norm": 1.4060813188552856,
+      "learning_rate": 0.00017801619189916008,
+      "loss": 7.5112,
+      "step": 1238
+    },
+    {
+      "epoch": 0.6500524658971668,
+      "grad_norm": 1.3509002923965454,
+      "learning_rate": 0.0001779817492836682,
+      "loss": 7.5706,
+      "step": 1239
+    },
+    {
+      "epoch": 0.6505771248688352,
+      "grad_norm": 1.6165887117385864,
+      "learning_rate": 0.000177947283045735,
+      "loss": 7.5412,
+      "step": 1240
+    },
+    {
+      "epoch": 0.6511017838405037,
+      "grad_norm": 1.7650121450424194,
+      "learning_rate": 0.00017791279319580113,
+      "loss": 7.8366,
+      "step": 1241
+    },
+    {
+      "epoch": 0.6516264428121721,
+      "grad_norm": 1.5572558641433716,
+      "learning_rate": 0.00017787827974431427,
+      "loss": 6.7198,
+      "step": 1242
+    },
+    {
+      "epoch": 0.6521511017838405,
+      "grad_norm": 1.6787631511688232,
+      "learning_rate": 0.00017784374270172942,
+      "loss": 8.5223,
+      "step": 1243
+    },
+    {
+      "epoch": 0.6526757607555089,
+      "grad_norm": 1.975051999092102,
+      "learning_rate": 0.00017780918207850857,
+      "loss": 8.3379,
+      "step": 1244
+    },
+    {
+      "epoch": 0.6532004197271774,
+      "grad_norm": 1.9721925258636475,
+      "learning_rate": 0.00017777459788512095,
+      "loss": 8.3842,
+      "step": 1245
+    },
+    {
+      "epoch": 0.6537250786988458,
+      "grad_norm": 2.151292324066162,
+      "learning_rate": 0.00017773999013204284,
+      "loss": 6.9369,
+      "step": 1246
+    },
+    {
+      "epoch": 0.6542497376705142,
+      "grad_norm": 1.9202648401260376,
+      "learning_rate": 0.00017770535882975783,
+      "loss": 7.0776,
+      "step": 1247
+    },
+    {
+      "epoch": 0.6547743966421826,
+      "grad_norm": 2.297982931137085,
+      "learning_rate": 0.0001776707039887564,
+      "loss": 7.2104,
+      "step": 1248
+    },
+    {
+      "epoch": 0.655299055613851,
+      "grad_norm": 2.4272403717041016,
+      "learning_rate": 0.00017763602561953636,
+      "loss": 7.0545,
+      "step": 1249
+    },
+    {
+      "epoch": 0.6558237145855194,
+      "grad_norm": 5.506582736968994,
+      "learning_rate": 0.00017760132373260254,
+      "loss": 7.7152,
+      "step": 1250
+    },
+    {
+      "epoch": 0.6558237145855194,
+      "eval_loss": 1.9324959516525269,
+      "eval_runtime": 82.4867,
+      "eval_samples_per_second": 5.662,
+      "eval_steps_per_second": 1.891,
+      "step": 1250
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 3.336984962782986e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null