Fanucci
commited on
Training in progress, step 4140, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 201892112
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5cadbe51d559db96e86eef9f534a0bee2e0ea99aba510e0a28855ed1d0bc70a
|
| 3 |
size 201892112
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102864868
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:006b347290e03dad49fc36373c65ac91bbd8fd020705f94522a4e04e3bff7b95
|
| 3 |
size 102864868
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b68d5a1b9faff73cbbe06449508758f11cf04de1e43e5b3a13b45cefeff99b5
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6dd0b5405eeec1dc6f96e40b1a34ef593661c41170d56934a252e76e7889804
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.6455243229866028,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-4100",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -29043,6 +29043,286 @@
|
|
| 29043 |
"eval_samples_per_second": 25.811,
|
| 29044 |
"eval_steps_per_second": 6.453,
|
| 29045 |
"step": 4100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29046 |
}
|
| 29047 |
],
|
| 29048 |
"logging_steps": 1,
|
|
@@ -29066,12 +29346,12 @@
|
|
| 29066 |
"should_evaluate": false,
|
| 29067 |
"should_log": false,
|
| 29068 |
"should_save": true,
|
| 29069 |
-
"should_training_stop":
|
| 29070 |
},
|
| 29071 |
"attributes": {}
|
| 29072 |
}
|
| 29073 |
},
|
| 29074 |
-
"total_flos": 1.
|
| 29075 |
"train_batch_size": 4,
|
| 29076 |
"trial_name": null,
|
| 29077 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.6455243229866028,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-4100",
|
| 4 |
+
"epoch": 0.4495419070240923,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 4140,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 29043 |
"eval_samples_per_second": 25.811,
|
| 29044 |
"eval_steps_per_second": 6.453,
|
| 29045 |
"step": 4100
|
| 29046 |
+
},
|
| 29047 |
+
{
|
| 29048 |
+
"epoch": 0.4453070919579233,
|
| 29049 |
+
"grad_norm": 0.3129846751689911,
|
| 29050 |
+
"learning_rate": 4.4001452062236715e-08,
|
| 29051 |
+
"loss": 0.6301,
|
| 29052 |
+
"step": 4101
|
| 29053 |
+
},
|
| 29054 |
+
{
|
| 29055 |
+
"epoch": 0.44541567695962,
|
| 29056 |
+
"grad_norm": 0.3126542270183563,
|
| 29057 |
+
"learning_rate": 4.1774051735721244e-08,
|
| 29058 |
+
"loss": 0.6597,
|
| 29059 |
+
"step": 4102
|
| 29060 |
+
},
|
| 29061 |
+
{
|
| 29062 |
+
"epoch": 0.4455242619613166,
|
| 29063 |
+
"grad_norm": 0.3281303942203522,
|
| 29064 |
+
"learning_rate": 3.9604490060962674e-08,
|
| 29065 |
+
"loss": 0.7234,
|
| 29066 |
+
"step": 4103
|
| 29067 |
+
},
|
| 29068 |
+
{
|
| 29069 |
+
"epoch": 0.44563284696301325,
|
| 29070 |
+
"grad_norm": 0.28728532791137695,
|
| 29071 |
+
"learning_rate": 3.749276829333459e-08,
|
| 29072 |
+
"loss": 0.5878,
|
| 29073 |
+
"step": 4104
|
| 29074 |
+
},
|
| 29075 |
+
{
|
| 29076 |
+
"epoch": 0.4457414319647099,
|
| 29077 |
+
"grad_norm": 0.3254416882991791,
|
| 29078 |
+
"learning_rate": 3.5438887654737355e-08,
|
| 29079 |
+
"loss": 0.7285,
|
| 29080 |
+
"step": 4105
|
| 29081 |
+
},
|
| 29082 |
+
{
|
| 29083 |
+
"epoch": 0.4458500169664065,
|
| 29084 |
+
"grad_norm": 0.33132821321487427,
|
| 29085 |
+
"learning_rate": 3.344284933360919e-08,
|
| 29086 |
+
"loss": 0.7421,
|
| 29087 |
+
"step": 4106
|
| 29088 |
+
},
|
| 29089 |
+
{
|
| 29090 |
+
"epoch": 0.44595860196810316,
|
| 29091 |
+
"grad_norm": 0.32671990990638733,
|
| 29092 |
+
"learning_rate": 3.150465448490403e-08,
|
| 29093 |
+
"loss": 0.6265,
|
| 29094 |
+
"step": 4107
|
| 29095 |
+
},
|
| 29096 |
+
{
|
| 29097 |
+
"epoch": 0.4460671869697998,
|
| 29098 |
+
"grad_norm": 0.325366348028183,
|
| 29099 |
+
"learning_rate": 2.962430423011364e-08,
|
| 29100 |
+
"loss": 0.7648,
|
| 29101 |
+
"step": 4108
|
| 29102 |
+
},
|
| 29103 |
+
{
|
| 29104 |
+
"epoch": 0.44617577197149644,
|
| 29105 |
+
"grad_norm": 0.30909645557403564,
|
| 29106 |
+
"learning_rate": 2.7801799657278805e-08,
|
| 29107 |
+
"loss": 0.6806,
|
| 29108 |
+
"step": 4109
|
| 29109 |
+
},
|
| 29110 |
+
{
|
| 29111 |
+
"epoch": 0.4462843569731931,
|
| 29112 |
+
"grad_norm": 0.3081817924976349,
|
| 29113 |
+
"learning_rate": 2.6037141820933753e-08,
|
| 29114 |
+
"loss": 0.6483,
|
| 29115 |
+
"step": 4110
|
| 29116 |
+
},
|
| 29117 |
+
{
|
| 29118 |
+
"epoch": 0.4463929419748897,
|
| 29119 |
+
"grad_norm": 0.28678232431411743,
|
| 29120 |
+
"learning_rate": 2.4330331742172806e-08,
|
| 29121 |
+
"loss": 0.53,
|
| 29122 |
+
"step": 4111
|
| 29123 |
+
},
|
| 29124 |
+
{
|
| 29125 |
+
"epoch": 0.44650152697658635,
|
| 29126 |
+
"grad_norm": 0.29590025544166565,
|
| 29127 |
+
"learning_rate": 2.268137040859486e-08,
|
| 29128 |
+
"loss": 0.6223,
|
| 29129 |
+
"step": 4112
|
| 29130 |
+
},
|
| 29131 |
+
{
|
| 29132 |
+
"epoch": 0.446610111978283,
|
| 29133 |
+
"grad_norm": 0.3081272542476654,
|
| 29134 |
+
"learning_rate": 2.109025877433668e-08,
|
| 29135 |
+
"loss": 0.6641,
|
| 29136 |
+
"step": 4113
|
| 29137 |
+
},
|
| 29138 |
+
{
|
| 29139 |
+
"epoch": 0.4467186969799796,
|
| 29140 |
+
"grad_norm": 0.3248719573020935,
|
| 29141 |
+
"learning_rate": 1.955699776006181e-08,
|
| 29142 |
+
"loss": 0.7767,
|
| 29143 |
+
"step": 4114
|
| 29144 |
+
},
|
| 29145 |
+
{
|
| 29146 |
+
"epoch": 0.44682728198167626,
|
| 29147 |
+
"grad_norm": 0.312339186668396,
|
| 29148 |
+
"learning_rate": 1.808158825297168e-08,
|
| 29149 |
+
"loss": 0.6629,
|
| 29150 |
+
"step": 4115
|
| 29151 |
+
},
|
| 29152 |
+
{
|
| 29153 |
+
"epoch": 0.4469358669833729,
|
| 29154 |
+
"grad_norm": 0.3206634819507599,
|
| 29155 |
+
"learning_rate": 1.666403110676118e-08,
|
| 29156 |
+
"loss": 0.649,
|
| 29157 |
+
"step": 4116
|
| 29158 |
+
},
|
| 29159 |
+
{
|
| 29160 |
+
"epoch": 0.44704445198506954,
|
| 29161 |
+
"grad_norm": 0.30358952283859253,
|
| 29162 |
+
"learning_rate": 1.530432714167418e-08,
|
| 29163 |
+
"loss": 0.6306,
|
| 29164 |
+
"step": 4117
|
| 29165 |
+
},
|
| 29166 |
+
{
|
| 29167 |
+
"epoch": 0.44715303698676623,
|
| 29168 |
+
"grad_norm": 0.2956608831882477,
|
| 29169 |
+
"learning_rate": 1.4002477144470227e-08,
|
| 29170 |
+
"loss": 0.5967,
|
| 29171 |
+
"step": 4118
|
| 29172 |
+
},
|
| 29173 |
+
{
|
| 29174 |
+
"epoch": 0.44726162198846287,
|
| 29175 |
+
"grad_norm": 0.31782612204551697,
|
| 29176 |
+
"learning_rate": 1.275848186845785e-08,
|
| 29177 |
+
"loss": 0.6721,
|
| 29178 |
+
"step": 4119
|
| 29179 |
+
},
|
| 29180 |
+
{
|
| 29181 |
+
"epoch": 0.4473702069901595,
|
| 29182 |
+
"grad_norm": 0.28622129559516907,
|
| 29183 |
+
"learning_rate": 1.1572342033416839e-08,
|
| 29184 |
+
"loss": 0.6106,
|
| 29185 |
+
"step": 4120
|
| 29186 |
+
},
|
| 29187 |
+
{
|
| 29188 |
+
"epoch": 0.44747879199185614,
|
| 29189 |
+
"grad_norm": 0.3039482533931732,
|
| 29190 |
+
"learning_rate": 1.044405832569817e-08,
|
| 29191 |
+
"loss": 0.7213,
|
| 29192 |
+
"step": 4121
|
| 29193 |
+
},
|
| 29194 |
+
{
|
| 29195 |
+
"epoch": 0.4475873769935528,
|
| 29196 |
+
"grad_norm": 0.3242916464805603,
|
| 29197 |
+
"learning_rate": 9.373631398157389e-09,
|
| 29198 |
+
"loss": 0.7128,
|
| 29199 |
+
"step": 4122
|
| 29200 |
+
},
|
| 29201 |
+
{
|
| 29202 |
+
"epoch": 0.4476959619952494,
|
| 29203 |
+
"grad_norm": 0.30805718898773193,
|
| 29204 |
+
"learning_rate": 8.361061870176823e-09,
|
| 29205 |
+
"loss": 0.5978,
|
| 29206 |
+
"step": 4123
|
| 29207 |
+
},
|
| 29208 |
+
{
|
| 29209 |
+
"epoch": 0.44780454699694605,
|
| 29210 |
+
"grad_norm": 0.3264826536178589,
|
| 29211 |
+
"learning_rate": 7.4063503276544655e-09,
|
| 29212 |
+
"loss": 0.709,
|
| 29213 |
+
"step": 4124
|
| 29214 |
+
},
|
| 29215 |
+
{
|
| 29216 |
+
"epoch": 0.4479131319986427,
|
| 29217 |
+
"grad_norm": 0.3391072452068329,
|
| 29218 |
+
"learning_rate": 6.50949732301509e-09,
|
| 29219 |
+
"loss": 0.712,
|
| 29220 |
+
"step": 4125
|
| 29221 |
+
},
|
| 29222 |
+
{
|
| 29223 |
+
"epoch": 0.4480217170003393,
|
| 29224 |
+
"grad_norm": 0.3170183598995209,
|
| 29225 |
+
"learning_rate": 5.670503375188041e-09,
|
| 29226 |
+
"loss": 0.6444,
|
| 29227 |
+
"step": 4126
|
| 29228 |
+
},
|
| 29229 |
+
{
|
| 29230 |
+
"epoch": 0.44813030200203596,
|
| 29231 |
+
"grad_norm": 0.32671859860420227,
|
| 29232 |
+
"learning_rate": 4.889368969662744e-09,
|
| 29233 |
+
"loss": 0.7363,
|
| 29234 |
+
"step": 4127
|
| 29235 |
+
},
|
| 29236 |
+
{
|
| 29237 |
+
"epoch": 0.4482388870037326,
|
| 29238 |
+
"grad_norm": 0.34119758009910583,
|
| 29239 |
+
"learning_rate": 4.1660945584109936e-09,
|
| 29240 |
+
"loss": 0.6435,
|
| 29241 |
+
"step": 4128
|
| 29242 |
+
},
|
| 29243 |
+
{
|
| 29244 |
+
"epoch": 0.44834747200542924,
|
| 29245 |
+
"grad_norm": 0.3383527100086212,
|
| 29246 |
+
"learning_rate": 3.5006805599424596e-09,
|
| 29247 |
+
"loss": 0.7669,
|
| 29248 |
+
"step": 4129
|
| 29249 |
+
},
|
| 29250 |
+
{
|
| 29251 |
+
"epoch": 0.4484560570071259,
|
| 29252 |
+
"grad_norm": 0.2880455553531647,
|
| 29253 |
+
"learning_rate": 2.893127359282488e-09,
|
| 29254 |
+
"loss": 0.6149,
|
| 29255 |
+
"step": 4130
|
| 29256 |
+
},
|
| 29257 |
+
{
|
| 29258 |
+
"epoch": 0.4485646420088225,
|
| 29259 |
+
"grad_norm": 0.3154943585395813,
|
| 29260 |
+
"learning_rate": 2.3434353079831996e-09,
|
| 29261 |
+
"loss": 0.701,
|
| 29262 |
+
"step": 4131
|
| 29263 |
+
},
|
| 29264 |
+
{
|
| 29265 |
+
"epoch": 0.44867322701051915,
|
| 29266 |
+
"grad_norm": 0.3077964782714844,
|
| 29267 |
+
"learning_rate": 1.851604724112388e-09,
|
| 29268 |
+
"loss": 0.6714,
|
| 29269 |
+
"step": 4132
|
| 29270 |
+
},
|
| 29271 |
+
{
|
| 29272 |
+
"epoch": 0.4487818120122158,
|
| 29273 |
+
"grad_norm": 0.3311520218849182,
|
| 29274 |
+
"learning_rate": 1.4176358922535216e-09,
|
| 29275 |
+
"loss": 0.7025,
|
| 29276 |
+
"step": 4133
|
| 29277 |
+
},
|
| 29278 |
+
{
|
| 29279 |
+
"epoch": 0.4488903970139125,
|
| 29280 |
+
"grad_norm": 0.3134852945804596,
|
| 29281 |
+
"learning_rate": 1.041529063516844e-09,
|
| 29282 |
+
"loss": 0.6822,
|
| 29283 |
+
"step": 4134
|
| 29284 |
+
},
|
| 29285 |
+
{
|
| 29286 |
+
"epoch": 0.4489989820156091,
|
| 29287 |
+
"grad_norm": 0.2992844581604004,
|
| 29288 |
+
"learning_rate": 7.232844555282725e-10,
|
| 29289 |
+
"loss": 0.6657,
|
| 29290 |
+
"step": 4135
|
| 29291 |
+
},
|
| 29292 |
+
{
|
| 29293 |
+
"epoch": 0.44910756701730575,
|
| 29294 |
+
"grad_norm": 0.2949857711791992,
|
| 29295 |
+
"learning_rate": 4.629022524182958e-10,
|
| 29296 |
+
"loss": 0.6671,
|
| 29297 |
+
"step": 4136
|
| 29298 |
+
},
|
| 29299 |
+
{
|
| 29300 |
+
"epoch": 0.4492161520190024,
|
| 29301 |
+
"grad_norm": 0.3150671422481537,
|
| 29302 |
+
"learning_rate": 2.603826048774849e-10,
|
| 29303 |
+
"loss": 0.657,
|
| 29304 |
+
"step": 4137
|
| 29305 |
+
},
|
| 29306 |
+
{
|
| 29307 |
+
"epoch": 0.449324737020699,
|
| 29308 |
+
"grad_norm": 0.3099953234195709,
|
| 29309 |
+
"learning_rate": 1.1572563006767567e-10,
|
| 29310 |
+
"loss": 0.6787,
|
| 29311 |
+
"step": 4138
|
| 29312 |
+
},
|
| 29313 |
+
{
|
| 29314 |
+
"epoch": 0.44943332202239566,
|
| 29315 |
+
"grad_norm": 0.28630250692367554,
|
| 29316 |
+
"learning_rate": 2.8931411699684164e-11,
|
| 29317 |
+
"loss": 0.5811,
|
| 29318 |
+
"step": 4139
|
| 29319 |
+
},
|
| 29320 |
+
{
|
| 29321 |
+
"epoch": 0.4495419070240923,
|
| 29322 |
+
"grad_norm": 0.31658029556274414,
|
| 29323 |
+
"learning_rate": 0.0,
|
| 29324 |
+
"loss": 0.6864,
|
| 29325 |
+
"step": 4140
|
| 29326 |
}
|
| 29327 |
],
|
| 29328 |
"logging_steps": 1,
|
|
|
|
| 29346 |
"should_evaluate": false,
|
| 29347 |
"should_log": false,
|
| 29348 |
"should_save": true,
|
| 29349 |
+
"should_training_stop": true
|
| 29350 |
},
|
| 29351 |
"attributes": {}
|
| 29352 |
}
|
| 29353 |
},
|
| 29354 |
+
"total_flos": 1.822881497330221e+18,
|
| 29355 |
"train_batch_size": 4,
|
| 29356 |
"trial_name": null,
|
| 29357 |
"trial_params": null
|