Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86b3b63be4448755c0009b69660d5b9414363b669c5d657fda85d4386ba0f2dc
 size 957942768

 version https://git-lfs.github.com/spec/v1
+oid sha256:efc1163b23eb593a8578929e2d130f96856b97846e4d1b51a959c1dabb4bf922
 size 957942768

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:972ec02ed85fb0b1e514b3e282beecbfbea88f1cf7554bba444c742a4f7b220f
 size 487013236

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ca1a8806e2d359746c96a8505c0db43cba130ceb07ec58151102bc711638ccf
 size 487013236

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:460992d1fc328a7355a0075c0ee699b21b745584f7f8824117265d427680a5f8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4f2232a20060f4e34ec2de4e5c7dd055420041fd4b32536fbe812cf646a366e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0652429c23558e0e0a102ee7a03bf6b258b24fad4104041d1c89929993cab61a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ded3d032047d899801b61ad8c24ff5f0dcb962a1322be38b449c3ef1152b14be
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.050189256668091,
   "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.22475066722854334,
   "eval_steps": 50,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2879,6 +2879,364 @@
       "eval_samples_per_second": 8.484,
       "eval_steps_per_second": 8.484,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2893,7 +3251,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -2902,12 +3260,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.92624150904832e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.050189256668091,
   "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.2528445006321112,
   "eval_steps": 50,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.484,
       "eval_steps_per_second": 8.484,
       "step": 400
+    },
+    {
+      "epoch": 0.2253125438966147,
+      "grad_norm": 1.295182704925537,
+      "learning_rate": 0.0002960296947671209,
+      "loss": 2.1389,
+      "step": 401
+    },
+    {
+      "epoch": 0.22587442056468604,
+      "grad_norm": 1.8076077699661255,
+      "learning_rate": 0.00029600945097446043,
+      "loss": 2.3872,
+      "step": 402
+    },
+    {
+      "epoch": 0.2264362972327574,
+      "grad_norm": 1.4435436725616455,
+      "learning_rate": 0.0002959891563991561,
+      "loss": 1.7958,
+      "step": 403
+    },
+    {
+      "epoch": 0.22699817390082877,
+      "grad_norm": 1.2692290544509888,
+      "learning_rate": 0.0002959688110482665,
+      "loss": 2.1162,
+      "step": 404
+    },
+    {
+      "epoch": 0.22756005056890014,
+      "grad_norm": 1.4647787809371948,
+      "learning_rate": 0.0002959484149288678,
+      "loss": 2.0282,
+      "step": 405
+    },
+    {
+      "epoch": 0.22812192723697147,
+      "grad_norm": 2.5118775367736816,
+      "learning_rate": 0.0002959279680480538,
+      "loss": 1.9518,
+      "step": 406
+    },
+    {
+      "epoch": 0.22868380390504284,
+      "grad_norm": 1.2008159160614014,
+      "learning_rate": 0.0002959074704129361,
+      "loss": 1.9314,
+      "step": 407
+    },
+    {
+      "epoch": 0.2292456805731142,
+      "grad_norm": 1.215867519378662,
+      "learning_rate": 0.00029588692203064376,
+      "loss": 1.6709,
+      "step": 408
+    },
+    {
+      "epoch": 0.22980755724118557,
+      "grad_norm": 1.3331108093261719,
+      "learning_rate": 0.00029586632290832366,
+      "loss": 1.9617,
+      "step": 409
+    },
+    {
+      "epoch": 0.2303694339092569,
+      "grad_norm": 1.653433918952942,
+      "learning_rate": 0.0002958456730531403,
+      "loss": 1.6985,
+      "step": 410
+    },
+    {
+      "epoch": 0.23093131057732827,
+      "grad_norm": 1.3687360286712646,
+      "learning_rate": 0.0002958249724722756,
+      "loss": 2.1661,
+      "step": 411
+    },
+    {
+      "epoch": 0.23149318724539963,
+      "grad_norm": 4.9779205322265625,
+      "learning_rate": 0.00029580422117292946,
+      "loss": 1.8247,
+      "step": 412
+    },
+    {
+      "epoch": 0.232055063913471,
+      "grad_norm": 1.7227343320846558,
+      "learning_rate": 0.0002957834191623191,
+      "loss": 2.2165,
+      "step": 413
+    },
+    {
+      "epoch": 0.23261694058154236,
+      "grad_norm": 1.2426742315292358,
+      "learning_rate": 0.00029576256644767976,
+      "loss": 1.8202,
+      "step": 414
+    },
+    {
+      "epoch": 0.2331788172496137,
+      "grad_norm": 1.4988555908203125,
+      "learning_rate": 0.00029574166303626394,
+      "loss": 1.9089,
+      "step": 415
+    },
+    {
+      "epoch": 0.23374069391768507,
+      "grad_norm": 1.558443546295166,
+      "learning_rate": 0.00029572070893534193,
+      "loss": 1.7309,
+      "step": 416
+    },
+    {
+      "epoch": 0.23430257058575643,
+      "grad_norm": 1.3139750957489014,
+      "learning_rate": 0.00029569970415220173,
+      "loss": 1.8066,
+      "step": 417
+    },
+    {
+      "epoch": 0.2348644472538278,
+      "grad_norm": 1.653947114944458,
+      "learning_rate": 0.0002956786486941488,
+      "loss": 2.1868,
+      "step": 418
+    },
+    {
+      "epoch": 0.23542632392189913,
+      "grad_norm": 1.4625102281570435,
+      "learning_rate": 0.0002956575425685064,
+      "loss": 2.0744,
+      "step": 419
+    },
+    {
+      "epoch": 0.2359882005899705,
+      "grad_norm": 1.7257875204086304,
+      "learning_rate": 0.0002956363857826152,
+      "loss": 1.8357,
+      "step": 420
+    },
+    {
+      "epoch": 0.23655007725804186,
+      "grad_norm": 1.5024346113204956,
+      "learning_rate": 0.00029561517834383373,
+      "loss": 2.0237,
+      "step": 421
+    },
+    {
+      "epoch": 0.23711195392611323,
+      "grad_norm": 1.6755564212799072,
+      "learning_rate": 0.0002955939202595379,
+      "loss": 1.8129,
+      "step": 422
+    },
+    {
+      "epoch": 0.23767383059418457,
+      "grad_norm": 1.4014532566070557,
+      "learning_rate": 0.0002955726115371215,
+      "loss": 2.0515,
+      "step": 423
+    },
+    {
+      "epoch": 0.23823570726225593,
+      "grad_norm": 1.712101936340332,
+      "learning_rate": 0.0002955512521839956,
+      "loss": 2.1661,
+      "step": 424
+    },
+    {
+      "epoch": 0.2387975839303273,
+      "grad_norm": 1.6710323095321655,
+      "learning_rate": 0.00029552984220758925,
+      "loss": 2.1425,
+      "step": 425
+    },
+    {
+      "epoch": 0.23935946059839866,
+      "grad_norm": 2.7601799964904785,
+      "learning_rate": 0.0002955083816153488,
+      "loss": 2.3031,
+      "step": 426
+    },
+    {
+      "epoch": 0.23992133726647,
+      "grad_norm": 2.1862637996673584,
+      "learning_rate": 0.00029548687041473836,
+      "loss": 1.8812,
+      "step": 427
+    },
+    {
+      "epoch": 0.24048321393454136,
+      "grad_norm": 1.7939826250076294,
+      "learning_rate": 0.0002954653086132396,
+      "loss": 1.802,
+      "step": 428
+    },
+    {
+      "epoch": 0.24104509060261273,
+      "grad_norm": 1.6978318691253662,
+      "learning_rate": 0.0002954436962183518,
+      "loss": 1.9085,
+      "step": 429
+    },
+    {
+      "epoch": 0.2416069672706841,
+      "grad_norm": 1.7141228914260864,
+      "learning_rate": 0.00029542203323759187,
+      "loss": 1.8558,
+      "step": 430
+    },
+    {
+      "epoch": 0.24216884393875546,
+      "grad_norm": 1.9624735116958618,
+      "learning_rate": 0.0002954003196784942,
+      "loss": 1.7955,
+      "step": 431
+    },
+    {
+      "epoch": 0.2427307206068268,
+      "grad_norm": 1.9632998704910278,
+      "learning_rate": 0.00029537855554861097,
+      "loss": 2.0489,
+      "step": 432
+    },
+    {
+      "epoch": 0.24329259727489816,
+      "grad_norm": 1.7335871458053589,
+      "learning_rate": 0.0002953567408555117,
+      "loss": 1.9226,
+      "step": 433
+    },
+    {
+      "epoch": 0.24385447394296952,
+      "grad_norm": 1.5185022354125977,
+      "learning_rate": 0.00029533487560678365,
+      "loss": 2.1437,
+      "step": 434
+    },
+    {
+      "epoch": 0.2444163506110409,
+      "grad_norm": 1.7918310165405273,
+      "learning_rate": 0.00029531295981003174,
+      "loss": 1.6909,
+      "step": 435
+    },
+    {
+      "epoch": 0.24497822727911223,
+      "grad_norm": 1.853018879890442,
+      "learning_rate": 0.00029529099347287826,
+      "loss": 2.161,
+      "step": 436
+    },
+    {
+      "epoch": 0.2455401039471836,
+      "grad_norm": 1.6407794952392578,
+      "learning_rate": 0.00029526897660296316,
+      "loss": 2.2005,
+      "step": 437
+    },
+    {
+      "epoch": 0.24610198061525496,
+      "grad_norm": 1.9181418418884277,
+      "learning_rate": 0.00029524690920794416,
+      "loss": 2.0464,
+      "step": 438
+    },
+    {
+      "epoch": 0.24666385728332632,
+      "grad_norm": 1.7185394763946533,
+      "learning_rate": 0.0002952247912954962,
+      "loss": 1.8386,
+      "step": 439
+    },
+    {
+      "epoch": 0.24722573395139766,
+      "grad_norm": 2.1002471446990967,
+      "learning_rate": 0.000295202622873312,
+      "loss": 2.0351,
+      "step": 440
+    },
+    {
+      "epoch": 0.24778761061946902,
+      "grad_norm": 2.016331672668457,
+      "learning_rate": 0.00029518040394910195,
+      "loss": 2.0552,
+      "step": 441
+    },
+    {
+      "epoch": 0.2483494872875404,
+      "grad_norm": 1.7284388542175293,
+      "learning_rate": 0.00029515813453059376,
+      "loss": 1.8612,
+      "step": 442
+    },
+    {
+      "epoch": 0.24891136395561175,
+      "grad_norm": 2.032498359680176,
+      "learning_rate": 0.00029513581462553285,
+      "loss": 2.1246,
+      "step": 443
+    },
+    {
+      "epoch": 0.2494732406236831,
+      "grad_norm": 2.0629770755767822,
+      "learning_rate": 0.0002951134442416822,
+      "loss": 2.165,
+      "step": 444
+    },
+    {
+      "epoch": 0.2500351172917545,
+      "grad_norm": 3.081624746322632,
+      "learning_rate": 0.00029509102338682225,
+      "loss": 2.3962,
+      "step": 445
+    },
+    {
+      "epoch": 0.2505969939598258,
+      "grad_norm": 1.9752740859985352,
+      "learning_rate": 0.0002950685520687511,
+      "loss": 2.5579,
+      "step": 446
+    },
+    {
+      "epoch": 0.25115887062789716,
+      "grad_norm": 2.6555721759796143,
+      "learning_rate": 0.0002950460302952844,
+      "loss": 2.335,
+      "step": 447
+    },
+    {
+      "epoch": 0.25172074729596855,
+      "grad_norm": 2.179882526397705,
+      "learning_rate": 0.00029502345807425523,
+      "loss": 2.1026,
+      "step": 448
+    },
+    {
+      "epoch": 0.2522826239640399,
+      "grad_norm": 2.176938533782959,
+      "learning_rate": 0.0002950008354135143,
+      "loss": 1.9971,
+      "step": 449
+    },
+    {
+      "epoch": 0.2528445006321112,
+      "grad_norm": 4.19490385055542,
+      "learning_rate": 0.00029497816232092997,
+      "loss": 2.6225,
+      "step": 450
+    },
+    {
+      "epoch": 0.2528445006321112,
+      "eval_loss": 2.0559065341949463,
+      "eval_runtime": 68.6035,
+      "eval_samples_per_second": 8.484,
+      "eval_steps_per_second": 8.484,
+      "step": 450
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.66702169767936e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null