Training in progress, step 440, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +153 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c0b202fc0c96405179f88a6c2eaa4f6dc3272d743fbddfecad7984da6b3126f
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:3adc18392d2f21276e1f3a5e51a07ca17b1146c2ef6b6e597fa70cbd35cb3bdb
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40a4057e0b09a78b12e194d176176ab3bdf1dba15e9b3d7cc076fde7fedd72a1
 size 1279647314

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a2cfd564c67e428d0d90d8802e1970d22aafbab1ec924f4f6c956cccc8140e5
 size 1279647314

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8989144fab65b9563db39df1e14d31e2d7e7e0d841ffc9302a3d61f93bd4035c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:32e08d0a211b4d4a818fb65e4db7285f388cba9154617f34f859f6540a9421f1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e62bbb669ef85272e1fb4d893e6c7bdc8670a8e92466433aef00a4eb78b394c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:003818438c5b86338e0642cffcf6bb6b8eec0d2c9f4884b9d0b63566f2711618
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.3341846466064453,
-  "best_model_checkpoint": "miner_id_24/checkpoint-420",
-  "epoch": 0.1330034636318654,
   "eval_steps": 20,
-  "global_step": 420,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3123,6 +3123,154 @@
       "eval_samples_per_second": 4.259,
       "eval_steps_per_second": 4.259,
       "step": 420
     }
   ],
   "logging_steps": 1,
@@ -3151,7 +3299,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.3441179726197555e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.3223698139190674,
+  "best_model_checkpoint": "miner_id_24/checkpoint-440",
+  "epoch": 0.1393369619000495,
   "eval_steps": 20,
+  "global_step": 440,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.259,
       "eval_steps_per_second": 4.259,
       "step": 420
+    },
+    {
+      "epoch": 0.1333201385452746,
+      "grad_norm": 0.5786765813827515,
+      "learning_rate": 0.00019942151943087426,
+      "loss": 2.2436,
+      "step": 421
+    },
+    {
+      "epoch": 0.1336368134586838,
+      "grad_norm": 0.5472723841667175,
+      "learning_rate": 0.00019941791308263955,
+      "loss": 2.211,
+      "step": 422
+    },
+    {
+      "epoch": 0.13395348837209303,
+      "grad_norm": 0.5549196600914001,
+      "learning_rate": 0.00019941429556082055,
+      "loss": 2.3284,
+      "step": 423
+    },
+    {
+      "epoch": 0.13427016328550223,
+      "grad_norm": 0.5404260754585266,
+      "learning_rate": 0.00019941066686582394,
+      "loss": 2.3089,
+      "step": 424
+    },
+    {
+      "epoch": 0.13458683819891143,
+      "grad_norm": 0.622839093208313,
+      "learning_rate": 0.00019940702699805743,
+      "loss": 2.204,
+      "step": 425
+    },
+    {
+      "epoch": 0.13490351311232063,
+      "grad_norm": 0.6209724545478821,
+      "learning_rate": 0.00019940337595793017,
+      "loss": 2.264,
+      "step": 426
+    },
+    {
+      "epoch": 0.13522018802572983,
+      "grad_norm": 0.6113777756690979,
+      "learning_rate": 0.00019939971374585252,
+      "loss": 2.3617,
+      "step": 427
+    },
+    {
+      "epoch": 0.13553686293913905,
+      "grad_norm": 0.591740608215332,
+      "learning_rate": 0.000199396040362236,
+      "loss": 2.2724,
+      "step": 428
+    },
+    {
+      "epoch": 0.13585353785254825,
+      "grad_norm": 0.7021921277046204,
+      "learning_rate": 0.00019939235580749353,
+      "loss": 2.396,
+      "step": 429
+    },
+    {
+      "epoch": 0.13617021276595745,
+      "grad_norm": 0.6321803331375122,
+      "learning_rate": 0.00019938866008203918,
+      "loss": 2.082,
+      "step": 430
+    },
+    {
+      "epoch": 0.13648688767936665,
+      "grad_norm": 0.6896611452102661,
+      "learning_rate": 0.00019938495318628832,
+      "loss": 2.2072,
+      "step": 431
+    },
+    {
+      "epoch": 0.13680356259277585,
+      "grad_norm": 0.670881986618042,
+      "learning_rate": 0.00019938123512065758,
+      "loss": 2.207,
+      "step": 432
+    },
+    {
+      "epoch": 0.13712023750618504,
+      "grad_norm": 0.6715372204780579,
+      "learning_rate": 0.00019937750588556484,
+      "loss": 2.1882,
+      "step": 433
+    },
+    {
+      "epoch": 0.13743691241959427,
+      "grad_norm": 0.7520514130592346,
+      "learning_rate": 0.0001993737654814292,
+      "loss": 2.3707,
+      "step": 434
+    },
+    {
+      "epoch": 0.13775358733300347,
+      "grad_norm": 0.6770562529563904,
+      "learning_rate": 0.00019937001390867105,
+      "loss": 2.2745,
+      "step": 435
+    },
+    {
+      "epoch": 0.13807026224641267,
+      "grad_norm": 0.7494112849235535,
+      "learning_rate": 0.00019936625116771204,
+      "loss": 2.2834,
+      "step": 436
+    },
+    {
+      "epoch": 0.13838693715982187,
+      "grad_norm": 0.7565605044364929,
+      "learning_rate": 0.0001993624772589751,
+      "loss": 2.3396,
+      "step": 437
+    },
+    {
+      "epoch": 0.13870361207323106,
+      "grad_norm": 0.8649305105209351,
+      "learning_rate": 0.0001993586921828843,
+      "loss": 2.3929,
+      "step": 438
+    },
+    {
+      "epoch": 0.1390202869866403,
+      "grad_norm": 0.7562718391418457,
+      "learning_rate": 0.0001993548959398651,
+      "loss": 2.2906,
+      "step": 439
+    },
+    {
+      "epoch": 0.1393369619000495,
+      "grad_norm": 0.8264215588569641,
+      "learning_rate": 0.00019935108853034414,
+      "loss": 2.4555,
+      "step": 440
+    },
+    {
+      "epoch": 0.1393369619000495,
+      "eval_loss": 2.3223698139190674,
+      "eval_runtime": 119.2317,
+      "eval_samples_per_second": 4.261,
+      "eval_steps_per_second": 4.261,
+      "step": 440
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.47716755004588e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null