Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ad721f183047deb9f9941632532d95dd5c6f94a75258458114ad606e6c79588
 size 590925768

 version https://git-lfs.github.com/spec/v1
+oid sha256:d148803dec3242bacfa6090b788cc33df772e93b875035cf7e3a3d8c47dc0693
 size 590925768

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba070a7b4f0eda9b4e553807c2914aafea5dfe03e7fed45faf09722646313126
 size 301533378

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2a243d4609f2848bbcd91d030f48333676cc98c340050529214e8a4c5cbc451
 size 301533378

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7ddc28fcb0d59d0c03a7ff796860f75248a656670e1261d481e7debe016a6dc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5871d01553a694652bee01b474f7aa31386bfbdbca60584c13070b444a88c461
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ef5f6fa421e52c5f015f38dbc8da890b8b41a4a78c203dae039e2fccfcbcb95
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0541d0446a58575a8e1bf0f51453829134e69b4b4f483226a260165a57705a1c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5511385202407837,
-  "best_model_checkpoint": "miner_id_24/checkpoint-550",
-  "epoch": 0.06160739288714646,
   "eval_steps": 50,
-  "global_step": 550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3953,6 +3953,364 @@
       "eval_samples_per_second": 9.607,
       "eval_steps_per_second": 9.607,
       "step": 550
     }
   ],
   "logging_steps": 1,
@@ -3976,12 +4334,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.065030154531635e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5509313941001892,
+  "best_model_checkpoint": "miner_id_24/checkpoint-600",
+  "epoch": 0.06720806496779613,
   "eval_steps": 50,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.607,
       "eval_steps_per_second": 9.607,
       "step": 550
+    },
+    {
+      "epoch": 0.06171940632875945,
+      "grad_norm": 0.12340470403432846,
+      "learning_rate": 5.07672955698109e-06,
+      "loss": 0.529,
+      "step": 551
+    },
+    {
+      "epoch": 0.061831419770372445,
+      "grad_norm": 0.16562983393669128,
+      "learning_rate": 4.872748404735644e-06,
+      "loss": 0.4595,
+      "step": 552
+    },
+    {
+      "epoch": 0.061943433211985435,
+      "grad_norm": 0.15129025280475616,
+      "learning_rate": 4.6728820045062954e-06,
+      "loss": 0.6089,
+      "step": 553
+    },
+    {
+      "epoch": 0.06205544665359843,
+      "grad_norm": 0.1321287453174591,
+      "learning_rate": 4.477136023048727e-06,
+      "loss": 0.5442,
+      "step": 554
+    },
+    {
+      "epoch": 0.06216746009521142,
+      "grad_norm": 0.1615147441625595,
+      "learning_rate": 4.285516010293522e-06,
+      "loss": 0.5495,
+      "step": 555
+    },
+    {
+      "epoch": 0.06227947353682442,
+      "grad_norm": 0.1276487559080124,
+      "learning_rate": 4.098027399188802e-06,
+      "loss": 0.4491,
+      "step": 556
+    },
+    {
+      "epoch": 0.06239148697843741,
+      "grad_norm": 0.1305229514837265,
+      "learning_rate": 3.914675505546277e-06,
+      "loss": 0.63,
+      "step": 557
+    },
+    {
+      "epoch": 0.0625035004200504,
+      "grad_norm": 0.13546405732631683,
+      "learning_rate": 3.735465527890458e-06,
+      "loss": 0.6798,
+      "step": 558
+    },
+    {
+      "epoch": 0.0626155138616634,
+      "grad_norm": 0.13185566663742065,
+      "learning_rate": 3.560402547311275e-06,
+      "loss": 0.575,
+      "step": 559
+    },
+    {
+      "epoch": 0.0627275273032764,
+      "grad_norm": 0.13408592343330383,
+      "learning_rate": 3.3894915273199987e-06,
+      "loss": 0.723,
+      "step": 560
+    },
+    {
+      "epoch": 0.06283954074488939,
+      "grad_norm": 0.11883525550365448,
+      "learning_rate": 3.2227373137085954e-06,
+      "loss": 0.4747,
+      "step": 561
+    },
+    {
+      "epoch": 0.06295155418650238,
+      "grad_norm": 0.14109228551387787,
+      "learning_rate": 3.0601446344122095e-06,
+      "loss": 0.4895,
+      "step": 562
+    },
+    {
+      "epoch": 0.06306356762811538,
+      "grad_norm": 0.14065487682819366,
+      "learning_rate": 2.9017180993752e-06,
+      "loss": 0.7272,
+      "step": 563
+    },
+    {
+      "epoch": 0.06317558106972837,
+      "grad_norm": 0.13580210506916046,
+      "learning_rate": 2.7474622004204304e-06,
+      "loss": 0.5744,
+      "step": 564
+    },
+    {
+      "epoch": 0.06328759451134136,
+      "grad_norm": 0.13952845335006714,
+      "learning_rate": 2.5973813111218546e-06,
+      "loss": 0.3957,
+      "step": 565
+    },
+    {
+      "epoch": 0.06339960795295435,
+      "grad_norm": 0.1433325558900833,
+      "learning_rate": 2.4514796866805964e-06,
+      "loss": 0.4962,
+      "step": 566
+    },
+    {
+      "epoch": 0.06351162139456734,
+      "grad_norm": 0.14881394803524017,
+      "learning_rate": 2.3097614638042493e-06,
+      "loss": 0.5508,
+      "step": 567
+    },
+    {
+      "epoch": 0.06362363483618035,
+      "grad_norm": 0.13152483105659485,
+      "learning_rate": 2.1722306605896022e-06,
+      "loss": 0.5332,
+      "step": 568
+    },
+    {
+      "epoch": 0.06373564827779334,
+      "grad_norm": 0.15379270911216736,
+      "learning_rate": 2.0388911764086966e-06,
+      "loss": 0.5214,
+      "step": 569
+    },
+    {
+      "epoch": 0.06384766171940633,
+      "grad_norm": 0.14026005566120148,
+      "learning_rate": 1.909746791798317e-06,
+      "loss": 0.473,
+      "step": 570
+    },
+    {
+      "epoch": 0.06395967516101932,
+      "grad_norm": 0.14658816158771515,
+      "learning_rate": 1.7848011683527562e-06,
+      "loss": 0.467,
+      "step": 571
+    },
+    {
+      "epoch": 0.06407168860263232,
+      "grad_norm": 0.14597094058990479,
+      "learning_rate": 1.6640578486200373e-06,
+      "loss": 0.5426,
+      "step": 572
+    },
+    {
+      "epoch": 0.06418370204424531,
+      "grad_norm": 0.14672952890396118,
+      "learning_rate": 1.5475202560014054e-06,
+      "loss": 0.623,
+      "step": 573
+    },
+    {
+      "epoch": 0.0642957154858583,
+      "grad_norm": 0.1516023725271225,
+      "learning_rate": 1.435191694654375e-06,
+      "loss": 0.6358,
+      "step": 574
+    },
+    {
+      "epoch": 0.06440772892747129,
+      "grad_norm": 0.13252241909503937,
+      "learning_rate": 1.3270753493989373e-06,
+      "loss": 0.4679,
+      "step": 575
+    },
+    {
+      "epoch": 0.0645197423690843,
+      "grad_norm": 0.14411140978336334,
+      "learning_rate": 1.2231742856273151e-06,
+      "loss": 0.5655,
+      "step": 576
+    },
+    {
+      "epoch": 0.06463175581069729,
+      "grad_norm": 0.15002818405628204,
+      "learning_rate": 1.1234914492170678e-06,
+      "loss": 0.5183,
+      "step": 577
+    },
+    {
+      "epoch": 0.06474376925231028,
+      "grad_norm": 0.15125377476215363,
+      "learning_rate": 1.0280296664475218e-06,
+      "loss": 0.5556,
+      "step": 578
+    },
+    {
+      "epoch": 0.06485578269392327,
+      "grad_norm": 0.153385192155838,
+      "learning_rate": 9.367916439196709e-07,
+      "loss": 0.4701,
+      "step": 579
+    },
+    {
+      "epoch": 0.06496779613553627,
+      "grad_norm": 0.1459246724843979,
+      "learning_rate": 8.49779968479436e-07,
+      "loss": 0.5861,
+      "step": 580
+    },
+    {
+      "epoch": 0.06507980957714926,
+      "grad_norm": 0.14465294778347015,
+      "learning_rate": 7.669971071442738e-07,
+      "loss": 0.581,
+      "step": 581
+    },
+    {
+      "epoch": 0.06519182301876225,
+      "grad_norm": 0.14619335532188416,
+      "learning_rate": 6.884454070333168e-07,
+      "loss": 0.5257,
+      "step": 582
+    },
+    {
+      "epoch": 0.06530383646037524,
+      "grad_norm": 0.13438697159290314,
+      "learning_rate": 6.141270953007593e-07,
+      "loss": 0.4356,
+      "step": 583
+    },
+    {
+      "epoch": 0.06541584990198823,
+      "grad_norm": 0.1581222116947174,
+      "learning_rate": 5.440442790727085e-07,
+      "loss": 0.5663,
+      "step": 584
+    },
+    {
+      "epoch": 0.06552786334360124,
+      "grad_norm": 0.15837247669696808,
+      "learning_rate": 4.781989453874813e-07,
+      "loss": 0.5957,
+      "step": 585
+    },
+    {
+      "epoch": 0.06563987678521423,
+      "grad_norm": 0.14343412220478058,
+      "learning_rate": 4.1659296113925046e-07,
+      "loss": 0.4383,
+      "step": 586
+    },
+    {
+      "epoch": 0.06575189022682722,
+      "grad_norm": 0.15165679156780243,
+      "learning_rate": 3.592280730250863e-07,
+      "loss": 0.5884,
+      "step": 587
+    },
+    {
+      "epoch": 0.06586390366844021,
+      "grad_norm": 0.1490369737148285,
+      "learning_rate": 3.06105907495513e-07,
+      "loss": 0.4845,
+      "step": 588
+    },
+    {
+      "epoch": 0.06597591711005321,
+      "grad_norm": 0.15211187303066254,
+      "learning_rate": 2.572279707082625e-07,
+      "loss": 0.5914,
+      "step": 589
+    },
+    {
+      "epoch": 0.0660879305516662,
+      "grad_norm": 0.1439492255449295,
+      "learning_rate": 2.125956484857083e-07,
+      "loss": 0.506,
+      "step": 590
+    },
+    {
+      "epoch": 0.06619994399327919,
+      "grad_norm": 0.15834854543209076,
+      "learning_rate": 1.7221020627548043e-07,
+      "loss": 0.6013,
+      "step": 591
+    },
+    {
+      "epoch": 0.06631195743489218,
+      "grad_norm": 0.15449443459510803,
+      "learning_rate": 1.3607278911462739e-07,
+      "loss": 0.4803,
+      "step": 592
+    },
+    {
+      "epoch": 0.06642397087650519,
+      "grad_norm": 0.16052158176898956,
+      "learning_rate": 1.0418442159715879e-07,
+      "loss": 0.4994,
+      "step": 593
+    },
+    {
+      "epoch": 0.06653598431811818,
+      "grad_norm": 0.15226219594478607,
+      "learning_rate": 7.654600784493536e-08,
+      "loss": 0.4476,
+      "step": 594
+    },
+    {
+      "epoch": 0.06664799775973117,
+      "grad_norm": 0.17426903545856476,
+      "learning_rate": 5.315833148210602e-08,
+      "loss": 0.5283,
+      "step": 595
+    },
+    {
+      "epoch": 0.06676001120134416,
+      "grad_norm": 0.16931919753551483,
+      "learning_rate": 3.402205561289229e-08,
+      "loss": 0.5116,
+      "step": 596
+    },
+    {
+      "epoch": 0.06687202464295715,
+      "grad_norm": 0.1650620847940445,
+      "learning_rate": 1.9137722802686772e-08,
+      "loss": 0.4657,
+      "step": 597
+    },
+    {
+      "epoch": 0.06698403808457015,
+      "grad_norm": 0.1828121691942215,
+      "learning_rate": 8.50575506278206e-09,
+      "loss": 0.4424,
+      "step": 598
+    },
+    {
+      "epoch": 0.06709605152618314,
+      "grad_norm": 0.19233034551143646,
+      "learning_rate": 2.126453838380282e-09,
+      "loss": 0.5529,
+      "step": 599
+    },
+    {
+      "epoch": 0.06720806496779613,
+      "grad_norm": 0.24840380251407623,
+      "learning_rate": 0.0,
+      "loss": 0.5998,
+      "step": 600
+    },
+    {
+      "epoch": 0.06720806496779613,
+      "eval_loss": 0.5509313941001892,
+      "eval_runtime": 30.0076,
+      "eval_samples_per_second": 9.564,
+      "eval_steps_per_second": 9.564,
+      "step": 600
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.535341585576755e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null