Training in progress, step 4140, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +284 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab8ff796d5a0aa9544b801ac8d000c5cf5a70ba6cb8b4bfa68d15836f013ec94
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5cadbe51d559db96e86eef9f534a0bee2e0ea99aba510e0a28855ed1d0bc70a
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57231fda362f945f0352fb4a397d80ba901f8ed5b3852b72e464229f69083c39
 size 102864868

 version https://git-lfs.github.com/spec/v1
+oid sha256:006b347290e03dad49fc36373c65ac91bbd8fd020705f94522a4e04e3bff7b95
 size 102864868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b664f6b5d5ee8fdd05af1164b71dfb90e4707b01450078266786504ef12d0e3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b68d5a1b9faff73cbbe06449508758f11cf04de1e43e5b3a13b45cefeff99b5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7b212df8169f0614cd9da7e49bb8587466d43a3f9ec4a30c90bea47b05a20c1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6dd0b5405eeec1dc6f96e40b1a34ef593661c41170d56934a252e76e7889804
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.6455243229866028,
   "best_model_checkpoint": "miner_id_24/checkpoint-4100",
-  "epoch": 0.44519850695622665,
   "eval_steps": 100,
-  "global_step": 4100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -29043,6 +29043,286 @@
       "eval_samples_per_second": 25.811,
       "eval_steps_per_second": 6.453,
       "step": 4100
     }
   ],
   "logging_steps": 1,
@@ -29066,12 +29346,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.805389684939948e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.6455243229866028,
   "best_model_checkpoint": "miner_id_24/checkpoint-4100",
+  "epoch": 0.4495419070240923,
   "eval_steps": 100,
+  "global_step": 4140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 25.811,
       "eval_steps_per_second": 6.453,
       "step": 4100
+    },
+    {
+      "epoch": 0.4453070919579233,
+      "grad_norm": 0.3129846751689911,
+      "learning_rate": 4.4001452062236715e-08,
+      "loss": 0.6301,
+      "step": 4101
+    },
+    {
+      "epoch": 0.44541567695962,
+      "grad_norm": 0.3126542270183563,
+      "learning_rate": 4.1774051735721244e-08,
+      "loss": 0.6597,
+      "step": 4102
+    },
+    {
+      "epoch": 0.4455242619613166,
+      "grad_norm": 0.3281303942203522,
+      "learning_rate": 3.9604490060962674e-08,
+      "loss": 0.7234,
+      "step": 4103
+    },
+    {
+      "epoch": 0.44563284696301325,
+      "grad_norm": 0.28728532791137695,
+      "learning_rate": 3.749276829333459e-08,
+      "loss": 0.5878,
+      "step": 4104
+    },
+    {
+      "epoch": 0.4457414319647099,
+      "grad_norm": 0.3254416882991791,
+      "learning_rate": 3.5438887654737355e-08,
+      "loss": 0.7285,
+      "step": 4105
+    },
+    {
+      "epoch": 0.4458500169664065,
+      "grad_norm": 0.33132821321487427,
+      "learning_rate": 3.344284933360919e-08,
+      "loss": 0.7421,
+      "step": 4106
+    },
+    {
+      "epoch": 0.44595860196810316,
+      "grad_norm": 0.32671990990638733,
+      "learning_rate": 3.150465448490403e-08,
+      "loss": 0.6265,
+      "step": 4107
+    },
+    {
+      "epoch": 0.4460671869697998,
+      "grad_norm": 0.325366348028183,
+      "learning_rate": 2.962430423011364e-08,
+      "loss": 0.7648,
+      "step": 4108
+    },
+    {
+      "epoch": 0.44617577197149644,
+      "grad_norm": 0.30909645557403564,
+      "learning_rate": 2.7801799657278805e-08,
+      "loss": 0.6806,
+      "step": 4109
+    },
+    {
+      "epoch": 0.4462843569731931,
+      "grad_norm": 0.3081817924976349,
+      "learning_rate": 2.6037141820933753e-08,
+      "loss": 0.6483,
+      "step": 4110
+    },
+    {
+      "epoch": 0.4463929419748897,
+      "grad_norm": 0.28678232431411743,
+      "learning_rate": 2.4330331742172806e-08,
+      "loss": 0.53,
+      "step": 4111
+    },
+    {
+      "epoch": 0.44650152697658635,
+      "grad_norm": 0.29590025544166565,
+      "learning_rate": 2.268137040859486e-08,
+      "loss": 0.6223,
+      "step": 4112
+    },
+    {
+      "epoch": 0.446610111978283,
+      "grad_norm": 0.3081272542476654,
+      "learning_rate": 2.109025877433668e-08,
+      "loss": 0.6641,
+      "step": 4113
+    },
+    {
+      "epoch": 0.4467186969799796,
+      "grad_norm": 0.3248719573020935,
+      "learning_rate": 1.955699776006181e-08,
+      "loss": 0.7767,
+      "step": 4114
+    },
+    {
+      "epoch": 0.44682728198167626,
+      "grad_norm": 0.312339186668396,
+      "learning_rate": 1.808158825297168e-08,
+      "loss": 0.6629,
+      "step": 4115
+    },
+    {
+      "epoch": 0.4469358669833729,
+      "grad_norm": 0.3206634819507599,
+      "learning_rate": 1.666403110676118e-08,
+      "loss": 0.649,
+      "step": 4116
+    },
+    {
+      "epoch": 0.44704445198506954,
+      "grad_norm": 0.30358952283859253,
+      "learning_rate": 1.530432714167418e-08,
+      "loss": 0.6306,
+      "step": 4117
+    },
+    {
+      "epoch": 0.44715303698676623,
+      "grad_norm": 0.2956608831882477,
+      "learning_rate": 1.4002477144470227e-08,
+      "loss": 0.5967,
+      "step": 4118
+    },
+    {
+      "epoch": 0.44726162198846287,
+      "grad_norm": 0.31782612204551697,
+      "learning_rate": 1.275848186845785e-08,
+      "loss": 0.6721,
+      "step": 4119
+    },
+    {
+      "epoch": 0.4473702069901595,
+      "grad_norm": 0.28622129559516907,
+      "learning_rate": 1.1572342033416839e-08,
+      "loss": 0.6106,
+      "step": 4120
+    },
+    {
+      "epoch": 0.44747879199185614,
+      "grad_norm": 0.3039482533931732,
+      "learning_rate": 1.044405832569817e-08,
+      "loss": 0.7213,
+      "step": 4121
+    },
+    {
+      "epoch": 0.4475873769935528,
+      "grad_norm": 0.3242916464805603,
+      "learning_rate": 9.373631398157389e-09,
+      "loss": 0.7128,
+      "step": 4122
+    },
+    {
+      "epoch": 0.4476959619952494,
+      "grad_norm": 0.30805718898773193,
+      "learning_rate": 8.361061870176823e-09,
+      "loss": 0.5978,
+      "step": 4123
+    },
+    {
+      "epoch": 0.44780454699694605,
+      "grad_norm": 0.3264826536178589,
+      "learning_rate": 7.4063503276544655e-09,
+      "loss": 0.709,
+      "step": 4124
+    },
+    {
+      "epoch": 0.4479131319986427,
+      "grad_norm": 0.3391072452068329,
+      "learning_rate": 6.50949732301509e-09,
+      "loss": 0.712,
+      "step": 4125
+    },
+    {
+      "epoch": 0.4480217170003393,
+      "grad_norm": 0.3170183598995209,
+      "learning_rate": 5.670503375188041e-09,
+      "loss": 0.6444,
+      "step": 4126
+    },
+    {
+      "epoch": 0.44813030200203596,
+      "grad_norm": 0.32671859860420227,
+      "learning_rate": 4.889368969662744e-09,
+      "loss": 0.7363,
+      "step": 4127
+    },
+    {
+      "epoch": 0.4482388870037326,
+      "grad_norm": 0.34119758009910583,
+      "learning_rate": 4.1660945584109936e-09,
+      "loss": 0.6435,
+      "step": 4128
+    },
+    {
+      "epoch": 0.44834747200542924,
+      "grad_norm": 0.3383527100086212,
+      "learning_rate": 3.5006805599424596e-09,
+      "loss": 0.7669,
+      "step": 4129
+    },
+    {
+      "epoch": 0.4484560570071259,
+      "grad_norm": 0.2880455553531647,
+      "learning_rate": 2.893127359282488e-09,
+      "loss": 0.6149,
+      "step": 4130
+    },
+    {
+      "epoch": 0.4485646420088225,
+      "grad_norm": 0.3154943585395813,
+      "learning_rate": 2.3434353079831996e-09,
+      "loss": 0.701,
+      "step": 4131
+    },
+    {
+      "epoch": 0.44867322701051915,
+      "grad_norm": 0.3077964782714844,
+      "learning_rate": 1.851604724112388e-09,
+      "loss": 0.6714,
+      "step": 4132
+    },
+    {
+      "epoch": 0.4487818120122158,
+      "grad_norm": 0.3311520218849182,
+      "learning_rate": 1.4176358922535216e-09,
+      "loss": 0.7025,
+      "step": 4133
+    },
+    {
+      "epoch": 0.4488903970139125,
+      "grad_norm": 0.3134852945804596,
+      "learning_rate": 1.041529063516844e-09,
+      "loss": 0.6822,
+      "step": 4134
+    },
+    {
+      "epoch": 0.4489989820156091,
+      "grad_norm": 0.2992844581604004,
+      "learning_rate": 7.232844555282725e-10,
+      "loss": 0.6657,
+      "step": 4135
+    },
+    {
+      "epoch": 0.44910756701730575,
+      "grad_norm": 0.2949857711791992,
+      "learning_rate": 4.629022524182958e-10,
+      "loss": 0.6671,
+      "step": 4136
+    },
+    {
+      "epoch": 0.4492161520190024,
+      "grad_norm": 0.3150671422481537,
+      "learning_rate": 2.603826048774849e-10,
+      "loss": 0.657,
+      "step": 4137
+    },
+    {
+      "epoch": 0.449324737020699,
+      "grad_norm": 0.3099953234195709,
+      "learning_rate": 1.1572563006767567e-10,
+      "loss": 0.6787,
+      "step": 4138
+    },
+    {
+      "epoch": 0.44943332202239566,
+      "grad_norm": 0.28630250692367554,
+      "learning_rate": 2.8931411699684164e-11,
+      "loss": 0.5811,
+      "step": 4139
+    },
+    {
+      "epoch": 0.4495419070240923,
+      "grad_norm": 0.31658029556274414,
+      "learning_rate": 0.0,
+      "loss": 0.6864,
+      "step": 4140
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.822881497330221e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null