Training in progress, step 350, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a380c2863c79af247d96d6850191d235a6db52061a5724a076c0dd239c9f504c
 size 1279323952

 version https://git-lfs.github.com/spec/v1
+oid sha256:552e477a98c0768adcd4647d633d95db645032bd3f24e1fbfae67c6bab7019be
 size 1279323952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bf5c79e2d4caba6d6b955ef8f298c069c385488cd86f05df41634db3f2abc48
 size 650153044

 version https://git-lfs.github.com/spec/v1
+oid sha256:398600ed212b3e574407dad7720c69a44d8b3f066be917bd7839a180244a2e8b
 size 650153044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91556d821de6d4edd57a622971805ecea7b0d8d7ba486bacda72e26999bdf356
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c974a18173c5b253003de5cec0751303a41dda5f56e281ac6e5b3f9dfe4eec8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a29bcee20d4efb76c3fc52ec9e4c8bd30e6af1b1acd2ceb28a084d03e0ab190
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef796d1416c59d81a908d0d533ec75f7cbfbe54b55c3fcdeb18a69501c216c1e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.4393945932388306,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.06552990484511734,
   "eval_steps": 50,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2163,6 +2163,364 @@
       "eval_samples_per_second": 4.645,
       "eval_steps_per_second": 4.645,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2177,7 +2535,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -2186,12 +2544,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.0216640034845491e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.4393945932388306,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.0764515556526369,
   "eval_steps": 50,
+  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.645,
       "eval_steps_per_second": 4.645,
       "step": 300
+    },
+    {
+      "epoch": 0.06574833786126773,
+      "grad_norm": 1.63740873336792,
+      "learning_rate": 0.0002996673213206589,
+      "loss": 1.9815,
+      "step": 301
+    },
+    {
+      "epoch": 0.06596677087741812,
+      "grad_norm": 1.046769380569458,
+      "learning_rate": 0.000299665031793473,
+      "loss": 1.6878,
+      "step": 302
+    },
+    {
+      "epoch": 0.06618520389356851,
+      "grad_norm": 0.513561487197876,
+      "learning_rate": 0.0002996627344237122,
+      "loss": 1.5141,
+      "step": 303
+    },
+    {
+      "epoch": 0.0664036369097189,
+      "grad_norm": 0.38992562890052795,
+      "learning_rate": 0.0002996604292114971,
+      "loss": 1.1818,
+      "step": 304
+    },
+    {
+      "epoch": 0.06662206992586929,
+      "grad_norm": 0.4074482023715973,
+      "learning_rate": 0.00029965811615694844,
+      "loss": 1.4627,
+      "step": 305
+    },
+    {
+      "epoch": 0.06684050294201968,
+      "grad_norm": 0.41403788328170776,
+      "learning_rate": 0.0002996557952601874,
+      "loss": 1.5682,
+      "step": 306
+    },
+    {
+      "epoch": 0.06705893595817007,
+      "grad_norm": 0.4047700762748718,
+      "learning_rate": 0.00029965346652133565,
+      "loss": 1.0902,
+      "step": 307
+    },
+    {
+      "epoch": 0.06727736897432046,
+      "grad_norm": 0.4367327094078064,
+      "learning_rate": 0.0002996511299405151,
+      "loss": 1.3241,
+      "step": 308
+    },
+    {
+      "epoch": 0.06749580199047087,
+      "grad_norm": 0.40244677662849426,
+      "learning_rate": 0.0002996487855178483,
+      "loss": 1.2218,
+      "step": 309
+    },
+    {
+      "epoch": 0.06771423500662126,
+      "grad_norm": 0.42241427302360535,
+      "learning_rate": 0.0002996464332534581,
+      "loss": 1.0742,
+      "step": 310
+    },
+    {
+      "epoch": 0.06793266802277165,
+      "grad_norm": 0.4748789966106415,
+      "learning_rate": 0.00029964407314746764,
+      "loss": 1.3064,
+      "step": 311
+    },
+    {
+      "epoch": 0.06815110103892204,
+      "grad_norm": 0.4514020085334778,
+      "learning_rate": 0.00029964170520000073,
+      "loss": 1.1954,
+      "step": 312
+    },
+    {
+      "epoch": 0.06836953405507243,
+      "grad_norm": 0.5442508459091187,
+      "learning_rate": 0.0002996393294111814,
+      "loss": 1.2674,
+      "step": 313
+    },
+    {
+      "epoch": 0.06858796707122282,
+      "grad_norm": 0.4211152195930481,
+      "learning_rate": 0.00029963694578113417,
+      "loss": 0.9404,
+      "step": 314
+    },
+    {
+      "epoch": 0.06880640008737321,
+      "grad_norm": 0.44201675057411194,
+      "learning_rate": 0.0002996345543099839,
+      "loss": 1.363,
+      "step": 315
+    },
+    {
+      "epoch": 0.0690248331035236,
+      "grad_norm": 0.4818100333213806,
+      "learning_rate": 0.0002996321549978559,
+      "loss": 1.1752,
+      "step": 316
+    },
+    {
+      "epoch": 0.06924326611967399,
+      "grad_norm": 0.5382769107818604,
+      "learning_rate": 0.0002996297478448759,
+      "loss": 1.3113,
+      "step": 317
+    },
+    {
+      "epoch": 0.06946169913582438,
+      "grad_norm": 0.44174817204475403,
+      "learning_rate": 0.0002996273328511701,
+      "loss": 1.4191,
+      "step": 318
+    },
+    {
+      "epoch": 0.06968013215197477,
+      "grad_norm": 0.5009336471557617,
+      "learning_rate": 0.000299624910016865,
+      "loss": 1.1101,
+      "step": 319
+    },
+    {
+      "epoch": 0.06989856516812516,
+      "grad_norm": 0.4469683766365051,
+      "learning_rate": 0.0002996224793420875,
+      "loss": 1.2823,
+      "step": 320
+    },
+    {
+      "epoch": 0.07011699818427555,
+      "grad_norm": 0.4948316216468811,
+      "learning_rate": 0.0002996200408269651,
+      "loss": 1.3302,
+      "step": 321
+    },
+    {
+      "epoch": 0.07033543120042594,
+      "grad_norm": 0.5020711421966553,
+      "learning_rate": 0.0002996175944716255,
+      "loss": 1.0552,
+      "step": 322
+    },
+    {
+      "epoch": 0.07055386421657633,
+      "grad_norm": 0.528529703617096,
+      "learning_rate": 0.0002996151402761969,
+      "loss": 1.2313,
+      "step": 323
+    },
+    {
+      "epoch": 0.07077229723272672,
+      "grad_norm": 0.48190081119537354,
+      "learning_rate": 0.00029961267824080785,
+      "loss": 1.2774,
+      "step": 324
+    },
+    {
+      "epoch": 0.07099073024887712,
+      "grad_norm": 0.43585675954818726,
+      "learning_rate": 0.0002996102083655875,
+      "loss": 0.8383,
+      "step": 325
+    },
+    {
+      "epoch": 0.0712091632650275,
+      "grad_norm": 0.4349531829357147,
+      "learning_rate": 0.00029960773065066515,
+      "loss": 1.0135,
+      "step": 326
+    },
+    {
+      "epoch": 0.0714275962811779,
+      "grad_norm": 0.45253872871398926,
+      "learning_rate": 0.00029960524509617067,
+      "loss": 1.0758,
+      "step": 327
+    },
+    {
+      "epoch": 0.07164602929732829,
+      "grad_norm": 0.48265528678894043,
+      "learning_rate": 0.0002996027517022343,
+      "loss": 1.0895,
+      "step": 328
+    },
+    {
+      "epoch": 0.07186446231347868,
+      "grad_norm": 0.4599095582962036,
+      "learning_rate": 0.0002996002504689867,
+      "loss": 0.9751,
+      "step": 329
+    },
+    {
+      "epoch": 0.07208289532962907,
+      "grad_norm": 0.4035511016845703,
+      "learning_rate": 0.000299597741396559,
+      "loss": 0.9046,
+      "step": 330
+    },
+    {
+      "epoch": 0.07230132834577946,
+      "grad_norm": 0.44477182626724243,
+      "learning_rate": 0.0002995952244850826,
+      "loss": 0.8351,
+      "step": 331
+    },
+    {
+      "epoch": 0.07251976136192985,
+      "grad_norm": 0.49605095386505127,
+      "learning_rate": 0.00029959269973468935,
+      "loss": 1.1617,
+      "step": 332
+    },
+    {
+      "epoch": 0.07273819437808025,
+      "grad_norm": 0.4683946967124939,
+      "learning_rate": 0.00029959016714551165,
+      "loss": 0.815,
+      "step": 333
+    },
+    {
+      "epoch": 0.07295662739423064,
+      "grad_norm": 0.5074111223220825,
+      "learning_rate": 0.00029958762671768223,
+      "loss": 1.08,
+      "step": 334
+    },
+    {
+      "epoch": 0.07317506041038103,
+      "grad_norm": 0.4522974491119385,
+      "learning_rate": 0.000299585078451334,
+      "loss": 1.0445,
+      "step": 335
+    },
+    {
+      "epoch": 0.07339349342653143,
+      "grad_norm": 0.49050214886665344,
+      "learning_rate": 0.00029958252234660077,
+      "loss": 1.023,
+      "step": 336
+    },
+    {
+      "epoch": 0.07361192644268182,
+      "grad_norm": 0.5193130373954773,
+      "learning_rate": 0.0002995799584036163,
+      "loss": 0.9875,
+      "step": 337
+    },
+    {
+      "epoch": 0.0738303594588322,
+      "grad_norm": 0.4777398109436035,
+      "learning_rate": 0.000299577386622515,
+      "loss": 1.0041,
+      "step": 338
+    },
+    {
+      "epoch": 0.0740487924749826,
+      "grad_norm": 0.5704786777496338,
+      "learning_rate": 0.0002995748070034317,
+      "loss": 1.2806,
+      "step": 339
+    },
+    {
+      "epoch": 0.07426722549113299,
+      "grad_norm": 0.5965543985366821,
+      "learning_rate": 0.0002995722195465013,
+      "loss": 1.2449,
+      "step": 340
+    },
+    {
+      "epoch": 0.07448565850728338,
+      "grad_norm": 0.5655717849731445,
+      "learning_rate": 0.0002995696242518598,
+      "loss": 1.492,
+      "step": 341
+    },
+    {
+      "epoch": 0.07470409152343377,
+      "grad_norm": 0.8123418092727661,
+      "learning_rate": 0.0002995670211196429,
+      "loss": 1.2705,
+      "step": 342
+    },
+    {
+      "epoch": 0.07492252453958416,
+      "grad_norm": 0.6008387804031372,
+      "learning_rate": 0.0002995644101499871,
+      "loss": 1.2037,
+      "step": 343
+    },
+    {
+      "epoch": 0.07514095755573455,
+      "grad_norm": 0.601810872554779,
+      "learning_rate": 0.0002995617913430292,
+      "loss": 1.2099,
+      "step": 344
+    },
+    {
+      "epoch": 0.07535939057188494,
+      "grad_norm": 0.589198887348175,
+      "learning_rate": 0.0002995591646989064,
+      "loss": 1.4846,
+      "step": 345
+    },
+    {
+      "epoch": 0.07557782358803533,
+      "grad_norm": 0.6595737338066101,
+      "learning_rate": 0.00029955653021775634,
+      "loss": 1.356,
+      "step": 346
+    },
+    {
+      "epoch": 0.07579625660418572,
+      "grad_norm": 0.5962746739387512,
+      "learning_rate": 0.00029955388789971717,
+      "loss": 1.1548,
+      "step": 347
+    },
+    {
+      "epoch": 0.07601468962033611,
+      "grad_norm": 0.6426790952682495,
+      "learning_rate": 0.0002995512377449273,
+      "loss": 1.16,
+      "step": 348
+    },
+    {
+      "epoch": 0.0762331226364865,
+      "grad_norm": 0.6003267765045166,
+      "learning_rate": 0.00029954857975352553,
+      "loss": 1.1259,
+      "step": 349
+    },
+    {
+      "epoch": 0.0764515556526369,
+      "grad_norm": 0.8509035110473633,
+      "learning_rate": 0.0002995459139256512,
+      "loss": 1.0266,
+      "step": 350
+    },
+    {
+      "epoch": 0.0764515556526369,
+      "eval_loss": 1.4707661867141724,
+      "eval_runtime": 79.3549,
+      "eval_samples_per_second": 4.65,
+      "eval_steps_per_second": 4.65,
+      "step": 350
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1919058703430451e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null