Training in progress, step 650, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af38ef62375ec891cbfce4fd28bd22d7e34c157587cc450f78b67c24cf274d79
 size 590925768

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9d2cdbc64c8aed93e3493383438d70c436fda2ba3d5f97e2c7265c7dc57d98a
 size 590925768

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9eecd260ac7545716c34b890ae0f24c36dadbc6be9c3347db814a46163e37fd
 size 301533378

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b530320adc4b6131fc012b9e702b151b24095c8446eb428ef27157c2d8238cb
 size 301533378

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de5671ef90315d5673225dc1ab0e98e92309a6a79f8b3269920ade8d1ca939a6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5bcc8702776502dcccb377886280ad3a2283e2886a22f0c6d17f658d0d4a4262
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15f9642019ca32b3c422641145349dede0a960d5ced3931f098afc618e340971
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2a95abcdacb257d1005778993fa8106f84c53601a08ca596204bb7536dd504e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.12324624508619308,
   "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 0.08760722759627669,
   "eval_steps": 50,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4311,6 +4311,364 @@
       "eval_samples_per_second": 8.378,
       "eval_steps_per_second": 8.378,
       "step": 600
     }
   ],
   "logging_steps": 1,
@@ -4325,7 +4683,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -4334,12 +4692,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.887151013095014e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.12324624508619308,
   "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.09490782989596641,
   "eval_steps": 50,
+  "global_step": 650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.378,
       "eval_steps_per_second": 8.378,
       "step": 600
+    },
+    {
+      "epoch": 0.08775323964227048,
+      "grad_norm": 0.7561835050582886,
+      "learning_rate": 0.00010500763630844842,
+      "loss": 0.4772,
+      "step": 601
+    },
+    {
+      "epoch": 0.08789925168826428,
+      "grad_norm": 0.5763047933578491,
+      "learning_rate": 0.00010455378198874092,
+      "loss": 0.2431,
+      "step": 602
+    },
+    {
+      "epoch": 0.08804526373425807,
+      "grad_norm": 0.41480308771133423,
+      "learning_rate": 0.00010410038531194427,
+      "loss": 0.1497,
+      "step": 603
+    },
+    {
+      "epoch": 0.08819127578025188,
+      "grad_norm": 0.5208658576011658,
+      "learning_rate": 0.0001036474508437579,
+      "loss": 0.297,
+      "step": 604
+    },
+    {
+      "epoch": 0.08833728782624567,
+      "grad_norm": 0.5673862099647522,
+      "learning_rate": 0.00010319498314522693,
+      "loss": 0.3096,
+      "step": 605
+    },
+    {
+      "epoch": 0.08848329987223946,
+      "grad_norm": 1.0537465810775757,
+      "learning_rate": 0.0001027429867726961,
+      "loss": 0.4135,
+      "step": 606
+    },
+    {
+      "epoch": 0.08862931191823326,
+      "grad_norm": 0.7120312452316284,
+      "learning_rate": 0.00010229146627776376,
+      "loss": 0.4118,
+      "step": 607
+    },
+    {
+      "epoch": 0.08877532396422705,
+      "grad_norm": 0.6144299507141113,
+      "learning_rate": 0.00010184042620723637,
+      "loss": 0.2349,
+      "step": 608
+    },
+    {
+      "epoch": 0.08892133601022084,
+      "grad_norm": 0.5841251611709595,
+      "learning_rate": 0.00010138987110308241,
+      "loss": 0.23,
+      "step": 609
+    },
+    {
+      "epoch": 0.08906734805621463,
+      "grad_norm": 0.6115531921386719,
+      "learning_rate": 0.00010093980550238675,
+      "loss": 0.2537,
+      "step": 610
+    },
+    {
+      "epoch": 0.08921336010220843,
+      "grad_norm": 0.521041214466095,
+      "learning_rate": 0.00010049023393730502,
+      "loss": 0.17,
+      "step": 611
+    },
+    {
+      "epoch": 0.08935937214820222,
+      "grad_norm": 0.6351935267448425,
+      "learning_rate": 0.00010004116093501789,
+      "loss": 0.1524,
+      "step": 612
+    },
+    {
+      "epoch": 0.08950538419419603,
+      "grad_norm": 0.43919649720191956,
+      "learning_rate": 9.959259101768545e-05,
+      "loss": 0.1156,
+      "step": 613
+    },
+    {
+      "epoch": 0.08965139624018982,
+      "grad_norm": 0.34156444668769836,
+      "learning_rate": 9.914452870240188e-05,
+      "loss": 0.0821,
+      "step": 614
+    },
+    {
+      "epoch": 0.08979740828618361,
+      "grad_norm": 0.3891245424747467,
+      "learning_rate": 9.869697850114969e-05,
+      "loss": 0.122,
+      "step": 615
+    },
+    {
+      "epoch": 0.0899434203321774,
+      "grad_norm": 0.44263043999671936,
+      "learning_rate": 9.824994492075444e-05,
+      "loss": 0.1545,
+      "step": 616
+    },
+    {
+      "epoch": 0.0900894323781712,
+      "grad_norm": 0.2580360770225525,
+      "learning_rate": 9.780343246283923e-05,
+      "loss": 0.0343,
+      "step": 617
+    },
+    {
+      "epoch": 0.09023544442416499,
+      "grad_norm": 0.5391973257064819,
+      "learning_rate": 9.735744562377968e-05,
+      "loss": 0.2013,
+      "step": 618
+    },
+    {
+      "epoch": 0.09038145647015879,
+      "grad_norm": 0.5272008180618286,
+      "learning_rate": 9.691198889465824e-05,
+      "loss": 0.1217,
+      "step": 619
+    },
+    {
+      "epoch": 0.09052746851615258,
+      "grad_norm": 0.3311897814273834,
+      "learning_rate": 9.646706676121923e-05,
+      "loss": 0.0898,
+      "step": 620
+    },
+    {
+      "epoch": 0.09067348056214637,
+      "grad_norm": 0.8139061331748962,
+      "learning_rate": 9.602268370382363e-05,
+      "loss": 0.2168,
+      "step": 621
+    },
+    {
+      "epoch": 0.09081949260814016,
+      "grad_norm": 0.8335362672805786,
+      "learning_rate": 9.557884419740386e-05,
+      "loss": 0.1057,
+      "step": 622
+    },
+    {
+      "epoch": 0.09096550465413397,
+      "grad_norm": 0.4633618891239166,
+      "learning_rate": 9.513555271141882e-05,
+      "loss": 0.1036,
+      "step": 623
+    },
+    {
+      "epoch": 0.09111151670012776,
+      "grad_norm": 0.5218425393104553,
+      "learning_rate": 9.46928137098089e-05,
+      "loss": 0.1092,
+      "step": 624
+    },
+    {
+      "epoch": 0.09125752874612156,
+      "grad_norm": 0.3713681995868683,
+      "learning_rate": 9.425063165095088e-05,
+      "loss": 0.064,
+      "step": 625
+    },
+    {
+      "epoch": 0.09140354079211535,
+      "grad_norm": 0.5872324705123901,
+      "learning_rate": 9.380901098761319e-05,
+      "loss": 0.1207,
+      "step": 626
+    },
+    {
+      "epoch": 0.09154955283810914,
+      "grad_norm": 0.786533772945404,
+      "learning_rate": 9.336795616691103e-05,
+      "loss": 0.1839,
+      "step": 627
+    },
+    {
+      "epoch": 0.09169556488410294,
+      "grad_norm": 1.0154459476470947,
+      "learning_rate": 9.292747163026154e-05,
+      "loss": 0.2062,
+      "step": 628
+    },
+    {
+      "epoch": 0.09184157693009673,
+      "grad_norm": 0.30658984184265137,
+      "learning_rate": 9.24875618133391e-05,
+      "loss": 0.0219,
+      "step": 629
+    },
+    {
+      "epoch": 0.09198758897609052,
+      "grad_norm": 0.9130704402923584,
+      "learning_rate": 9.204823114603068e-05,
+      "loss": 0.1325,
+      "step": 630
+    },
+    {
+      "epoch": 0.09213360102208432,
+      "grad_norm": 0.4111880958080292,
+      "learning_rate": 9.160948405239128e-05,
+      "loss": 0.0612,
+      "step": 631
+    },
+    {
+      "epoch": 0.09227961306807812,
+      "grad_norm": 0.014520260505378246,
+      "learning_rate": 9.117132495059916e-05,
+      "loss": 0.0002,
+      "step": 632
+    },
+    {
+      "epoch": 0.09242562511407192,
+      "grad_norm": 0.004326899070292711,
+      "learning_rate": 9.07337582529117e-05,
+      "loss": 0.0002,
+      "step": 633
+    },
+    {
+      "epoch": 0.09257163716006571,
+      "grad_norm": 0.008311674930155277,
+      "learning_rate": 9.02967883656207e-05,
+      "loss": 0.0001,
+      "step": 634
+    },
+    {
+      "epoch": 0.0927176492060595,
+      "grad_norm": 0.0011568053159862757,
+      "learning_rate": 8.986041968900796e-05,
+      "loss": 0.0001,
+      "step": 635
+    },
+    {
+      "epoch": 0.0928636612520533,
+      "grad_norm": 0.03044409491121769,
+      "learning_rate": 8.942465661730129e-05,
+      "loss": 0.0006,
+      "step": 636
+    },
+    {
+      "epoch": 0.09300967329804709,
+      "grad_norm": 0.0062841372564435005,
+      "learning_rate": 8.898950353862998e-05,
+      "loss": 0.0001,
+      "step": 637
+    },
+    {
+      "epoch": 0.09315568534404088,
+      "grad_norm": 0.004739905241876841,
+      "learning_rate": 8.85549648349807e-05,
+      "loss": 0.0001,
+      "step": 638
+    },
+    {
+      "epoch": 0.09330169739003467,
+      "grad_norm": 0.0005953651270829141,
+      "learning_rate": 8.812104488215332e-05,
+      "loss": 0.0,
+      "step": 639
+    },
+    {
+      "epoch": 0.09344770943602847,
+      "grad_norm": 0.001960960915312171,
+      "learning_rate": 8.768774804971705e-05,
+      "loss": 0.0001,
+      "step": 640
+    },
+    {
+      "epoch": 0.09359372148202226,
+      "grad_norm": 0.005731022451072931,
+      "learning_rate": 8.725507870096609e-05,
+      "loss": 0.0001,
+      "step": 641
+    },
+    {
+      "epoch": 0.09373973352801607,
+      "grad_norm": 0.002941427519544959,
+      "learning_rate": 8.6823041192876e-05,
+      "loss": 0.0001,
+      "step": 642
+    },
+    {
+      "epoch": 0.09388574557400986,
+      "grad_norm": 0.005661524832248688,
+      "learning_rate": 8.639163987605976e-05,
+      "loss": 0.0001,
+      "step": 643
+    },
+    {
+      "epoch": 0.09403175762000365,
+      "grad_norm": 0.0021083110477775335,
+      "learning_rate": 8.596087909472373e-05,
+      "loss": 0.0001,
+      "step": 644
+    },
+    {
+      "epoch": 0.09417776966599745,
+      "grad_norm": 0.0027868840843439102,
+      "learning_rate": 8.553076318662425e-05,
+      "loss": 0.0001,
+      "step": 645
+    },
+    {
+      "epoch": 0.09432378171199124,
+      "grad_norm": 0.001279468764550984,
+      "learning_rate": 8.510129648302372e-05,
+      "loss": 0.0001,
+      "step": 646
+    },
+    {
+      "epoch": 0.09446979375798503,
+      "grad_norm": 0.007063029333949089,
+      "learning_rate": 8.467248330864718e-05,
+      "loss": 0.0001,
+      "step": 647
+    },
+    {
+      "epoch": 0.09461580580397883,
+      "grad_norm": 0.0014115847880020738,
+      "learning_rate": 8.424432798163836e-05,
+      "loss": 0.0001,
+      "step": 648
+    },
+    {
+      "epoch": 0.09476181784997262,
+      "grad_norm": 0.911738395690918,
+      "learning_rate": 8.381683481351676e-05,
+      "loss": 0.0305,
+      "step": 649
+    },
+    {
+      "epoch": 0.09490782989596641,
+      "grad_norm": 0.0007988119614310563,
+      "learning_rate": 8.339000810913386e-05,
+      "loss": 0.0,
+      "step": 650
+    },
+    {
+      "epoch": 0.09490782989596641,
+      "eval_loss": 0.13424192368984222,
+      "eval_runtime": 26.2396,
+      "eval_samples_per_second": 8.384,
+      "eval_steps_per_second": 8.384,
+      "step": 650
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.467201778050662e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null