Training in progress, step 21340, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f542db4dc4bdfef70ff8a6ef414b9efe0e9422b96365de4fab56a2eb00cf0528
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d39973e68bd2ab323de4ccae017a93bc78950b3db9a2f7db5ee5b9e19e93cb2
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94eef74bedfbfbaf29c17e811e443f9ec61d689a2aeb015aed498e7127930e57
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:237512d340213f3c8e92a41e3f1546a67ec9ffb4ffbb161bf53b27ba33f0fe09
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -16051,6 +16051,260 @@
|
|
| 16051 |
"eval_spearman_manhattan": 0.7463752489757238,
|
| 16052 |
"eval_steps_per_second": 35.33,
|
| 16053 |
"step": 21000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16054 |
}
|
| 16055 |
],
|
| 16056 |
"logging_steps": 10,
|
|
@@ -16065,7 +16319,7 @@
|
|
| 16065 |
"should_evaluate": false,
|
| 16066 |
"should_log": false,
|
| 16067 |
"should_save": true,
|
| 16068 |
-
"should_training_stop":
|
| 16069 |
},
|
| 16070 |
"attributes": {}
|
| 16071 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.0,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 21340,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 16051 |
"eval_spearman_manhattan": 0.7463752489757238,
|
| 16052 |
"eval_steps_per_second": 35.33,
|
| 16053 |
"step": 21000
|
| 16054 |
+
},
|
| 16055 |
+
{
|
| 16056 |
+
"epoch": 9.845360824742269,
|
| 16057 |
+
"grad_norm": 1.0928393602371216,
|
| 16058 |
+
"learning_rate": 8.769329896907216e-06,
|
| 16059 |
+
"loss": 0.0467,
|
| 16060 |
+
"step": 21010
|
| 16061 |
+
},
|
| 16062 |
+
{
|
| 16063 |
+
"epoch": 9.850046860356139,
|
| 16064 |
+
"grad_norm": 1.111713171005249,
|
| 16065 |
+
"learning_rate": 8.768744142455483e-06,
|
| 16066 |
+
"loss": 0.0621,
|
| 16067 |
+
"step": 21020
|
| 16068 |
+
},
|
| 16069 |
+
{
|
| 16070 |
+
"epoch": 9.85473289597001,
|
| 16071 |
+
"grad_norm": 1.0577993392944336,
|
| 16072 |
+
"learning_rate": 8.768158388003749e-06,
|
| 16073 |
+
"loss": 0.0547,
|
| 16074 |
+
"step": 21030
|
| 16075 |
+
},
|
| 16076 |
+
{
|
| 16077 |
+
"epoch": 9.85941893158388,
|
| 16078 |
+
"grad_norm": 1.453430414199829,
|
| 16079 |
+
"learning_rate": 8.767572633552015e-06,
|
| 16080 |
+
"loss": 0.0476,
|
| 16081 |
+
"step": 21040
|
| 16082 |
+
},
|
| 16083 |
+
{
|
| 16084 |
+
"epoch": 9.86410496719775,
|
| 16085 |
+
"grad_norm": 1.527876377105713,
|
| 16086 |
+
"learning_rate": 8.766986879100282e-06,
|
| 16087 |
+
"loss": 0.0476,
|
| 16088 |
+
"step": 21050
|
| 16089 |
+
},
|
| 16090 |
+
{
|
| 16091 |
+
"epoch": 9.868791002811621,
|
| 16092 |
+
"grad_norm": 1.6117839813232422,
|
| 16093 |
+
"learning_rate": 8.766401124648548e-06,
|
| 16094 |
+
"loss": 0.06,
|
| 16095 |
+
"step": 21060
|
| 16096 |
+
},
|
| 16097 |
+
{
|
| 16098 |
+
"epoch": 9.873477038425492,
|
| 16099 |
+
"grad_norm": 1.4520413875579834,
|
| 16100 |
+
"learning_rate": 8.765815370196815e-06,
|
| 16101 |
+
"loss": 0.049,
|
| 16102 |
+
"step": 21070
|
| 16103 |
+
},
|
| 16104 |
+
{
|
| 16105 |
+
"epoch": 9.878163074039362,
|
| 16106 |
+
"grad_norm": 0.7348192930221558,
|
| 16107 |
+
"learning_rate": 8.76522961574508e-06,
|
| 16108 |
+
"loss": 0.0531,
|
| 16109 |
+
"step": 21080
|
| 16110 |
+
},
|
| 16111 |
+
{
|
| 16112 |
+
"epoch": 9.882849109653232,
|
| 16113 |
+
"grad_norm": 2.0447332859039307,
|
| 16114 |
+
"learning_rate": 8.764643861293347e-06,
|
| 16115 |
+
"loss": 0.0499,
|
| 16116 |
+
"step": 21090
|
| 16117 |
+
},
|
| 16118 |
+
{
|
| 16119 |
+
"epoch": 9.887535145267105,
|
| 16120 |
+
"grad_norm": 1.4770324230194092,
|
| 16121 |
+
"learning_rate": 8.764058106841614e-06,
|
| 16122 |
+
"loss": 0.0467,
|
| 16123 |
+
"step": 21100
|
| 16124 |
+
},
|
| 16125 |
+
{
|
| 16126 |
+
"epoch": 9.892221180880975,
|
| 16127 |
+
"grad_norm": 2.313798189163208,
|
| 16128 |
+
"learning_rate": 8.763472352389879e-06,
|
| 16129 |
+
"loss": 0.0553,
|
| 16130 |
+
"step": 21110
|
| 16131 |
+
},
|
| 16132 |
+
{
|
| 16133 |
+
"epoch": 9.896907216494846,
|
| 16134 |
+
"grad_norm": 1.1835334300994873,
|
| 16135 |
+
"learning_rate": 8.762886597938146e-06,
|
| 16136 |
+
"loss": 0.0554,
|
| 16137 |
+
"step": 21120
|
| 16138 |
+
},
|
| 16139 |
+
{
|
| 16140 |
+
"epoch": 9.901593252108716,
|
| 16141 |
+
"grad_norm": 1.580556035041809,
|
| 16142 |
+
"learning_rate": 8.762300843486411e-06,
|
| 16143 |
+
"loss": 0.0507,
|
| 16144 |
+
"step": 21130
|
| 16145 |
+
},
|
| 16146 |
+
{
|
| 16147 |
+
"epoch": 9.906279287722587,
|
| 16148 |
+
"grad_norm": 0.9451290965080261,
|
| 16149 |
+
"learning_rate": 8.761715089034676e-06,
|
| 16150 |
+
"loss": 0.0451,
|
| 16151 |
+
"step": 21140
|
| 16152 |
+
},
|
| 16153 |
+
{
|
| 16154 |
+
"epoch": 9.910965323336457,
|
| 16155 |
+
"grad_norm": 1.0342776775360107,
|
| 16156 |
+
"learning_rate": 8.761129334582945e-06,
|
| 16157 |
+
"loss": 0.0497,
|
| 16158 |
+
"step": 21150
|
| 16159 |
+
},
|
| 16160 |
+
{
|
| 16161 |
+
"epoch": 9.915651358950328,
|
| 16162 |
+
"grad_norm": 1.1484373807907104,
|
| 16163 |
+
"learning_rate": 8.76054358013121e-06,
|
| 16164 |
+
"loss": 0.0466,
|
| 16165 |
+
"step": 21160
|
| 16166 |
+
},
|
| 16167 |
+
{
|
| 16168 |
+
"epoch": 9.920337394564198,
|
| 16169 |
+
"grad_norm": 2.1361167430877686,
|
| 16170 |
+
"learning_rate": 8.759957825679475e-06,
|
| 16171 |
+
"loss": 0.0631,
|
| 16172 |
+
"step": 21170
|
| 16173 |
+
},
|
| 16174 |
+
{
|
| 16175 |
+
"epoch": 9.925023430178069,
|
| 16176 |
+
"grad_norm": 1.0633893013000488,
|
| 16177 |
+
"learning_rate": 8.759372071227742e-06,
|
| 16178 |
+
"loss": 0.0468,
|
| 16179 |
+
"step": 21180
|
| 16180 |
+
},
|
| 16181 |
+
{
|
| 16182 |
+
"epoch": 9.929709465791941,
|
| 16183 |
+
"grad_norm": 2.109058380126953,
|
| 16184 |
+
"learning_rate": 8.758786316776007e-06,
|
| 16185 |
+
"loss": 0.0572,
|
| 16186 |
+
"step": 21190
|
| 16187 |
+
},
|
| 16188 |
+
{
|
| 16189 |
+
"epoch": 9.934395501405811,
|
| 16190 |
+
"grad_norm": 1.6682454347610474,
|
| 16191 |
+
"learning_rate": 8.758200562324274e-06,
|
| 16192 |
+
"loss": 0.0542,
|
| 16193 |
+
"step": 21200
|
| 16194 |
+
},
|
| 16195 |
+
{
|
| 16196 |
+
"epoch": 9.939081537019682,
|
| 16197 |
+
"grad_norm": 1.6142311096191406,
|
| 16198 |
+
"learning_rate": 8.757614807872541e-06,
|
| 16199 |
+
"loss": 0.0508,
|
| 16200 |
+
"step": 21210
|
| 16201 |
+
},
|
| 16202 |
+
{
|
| 16203 |
+
"epoch": 9.943767572633552,
|
| 16204 |
+
"grad_norm": 1.564691424369812,
|
| 16205 |
+
"learning_rate": 8.757029053420806e-06,
|
| 16206 |
+
"loss": 0.0518,
|
| 16207 |
+
"step": 21220
|
| 16208 |
+
},
|
| 16209 |
+
{
|
| 16210 |
+
"epoch": 9.948453608247423,
|
| 16211 |
+
"grad_norm": 1.526147723197937,
|
| 16212 |
+
"learning_rate": 8.756443298969073e-06,
|
| 16213 |
+
"loss": 0.0541,
|
| 16214 |
+
"step": 21230
|
| 16215 |
+
},
|
| 16216 |
+
{
|
| 16217 |
+
"epoch": 9.953139643861293,
|
| 16218 |
+
"grad_norm": 1.049249291419983,
|
| 16219 |
+
"learning_rate": 8.755857544517339e-06,
|
| 16220 |
+
"loss": 0.0564,
|
| 16221 |
+
"step": 21240
|
| 16222 |
+
},
|
| 16223 |
+
{
|
| 16224 |
+
"epoch": 9.957825679475164,
|
| 16225 |
+
"grad_norm": 2.233095407485962,
|
| 16226 |
+
"learning_rate": 8.755271790065605e-06,
|
| 16227 |
+
"loss": 0.0406,
|
| 16228 |
+
"step": 21250
|
| 16229 |
+
},
|
| 16230 |
+
{
|
| 16231 |
+
"epoch": 9.957825679475164,
|
| 16232 |
+
"eval_loss": 0.03874640911817551,
|
| 16233 |
+
"eval_pearson_cosine": 0.7823759183217831,
|
| 16234 |
+
"eval_pearson_dot": 0.6362060503787177,
|
| 16235 |
+
"eval_pearson_euclidean": 0.7337201116603183,
|
| 16236 |
+
"eval_pearson_manhattan": 0.7335170834683709,
|
| 16237 |
+
"eval_runtime": 42.9733,
|
| 16238 |
+
"eval_samples_per_second": 34.905,
|
| 16239 |
+
"eval_spearman_cosine": 0.7845387476571652,
|
| 16240 |
+
"eval_spearman_dot": 0.6531911497158578,
|
| 16241 |
+
"eval_spearman_euclidean": 0.7463049410965222,
|
| 16242 |
+
"eval_spearman_manhattan": 0.7460162771900863,
|
| 16243 |
+
"eval_steps_per_second": 34.905,
|
| 16244 |
+
"step": 21250
|
| 16245 |
+
},
|
| 16246 |
+
{
|
| 16247 |
+
"epoch": 9.962511715089034,
|
| 16248 |
+
"grad_norm": 1.1485975980758667,
|
| 16249 |
+
"learning_rate": 8.754686035613872e-06,
|
| 16250 |
+
"loss": 0.0483,
|
| 16251 |
+
"step": 21260
|
| 16252 |
+
},
|
| 16253 |
+
{
|
| 16254 |
+
"epoch": 9.967197750702905,
|
| 16255 |
+
"grad_norm": 1.0608566999435425,
|
| 16256 |
+
"learning_rate": 8.754100281162138e-06,
|
| 16257 |
+
"loss": 0.0488,
|
| 16258 |
+
"step": 21270
|
| 16259 |
+
},
|
| 16260 |
+
{
|
| 16261 |
+
"epoch": 9.971883786316775,
|
| 16262 |
+
"grad_norm": 2.202392339706421,
|
| 16263 |
+
"learning_rate": 8.753514526710405e-06,
|
| 16264 |
+
"loss": 0.0559,
|
| 16265 |
+
"step": 21280
|
| 16266 |
+
},
|
| 16267 |
+
{
|
| 16268 |
+
"epoch": 9.976569821930646,
|
| 16269 |
+
"grad_norm": 1.6414839029312134,
|
| 16270 |
+
"learning_rate": 8.75292877225867e-06,
|
| 16271 |
+
"loss": 0.064,
|
| 16272 |
+
"step": 21290
|
| 16273 |
+
},
|
| 16274 |
+
{
|
| 16275 |
+
"epoch": 9.981255857544518,
|
| 16276 |
+
"grad_norm": 1.7389216423034668,
|
| 16277 |
+
"learning_rate": 8.752343017806935e-06,
|
| 16278 |
+
"loss": 0.0464,
|
| 16279 |
+
"step": 21300
|
| 16280 |
+
},
|
| 16281 |
+
{
|
| 16282 |
+
"epoch": 9.985941893158389,
|
| 16283 |
+
"grad_norm": 1.7043269872665405,
|
| 16284 |
+
"learning_rate": 8.751757263355202e-06,
|
| 16285 |
+
"loss": 0.0473,
|
| 16286 |
+
"step": 21310
|
| 16287 |
+
},
|
| 16288 |
+
{
|
| 16289 |
+
"epoch": 9.990627928772259,
|
| 16290 |
+
"grad_norm": 1.5085012912750244,
|
| 16291 |
+
"learning_rate": 8.751171508903469e-06,
|
| 16292 |
+
"loss": 0.0496,
|
| 16293 |
+
"step": 21320
|
| 16294 |
+
},
|
| 16295 |
+
{
|
| 16296 |
+
"epoch": 9.99531396438613,
|
| 16297 |
+
"grad_norm": 1.686661720275879,
|
| 16298 |
+
"learning_rate": 8.750585754451734e-06,
|
| 16299 |
+
"loss": 0.051,
|
| 16300 |
+
"step": 21330
|
| 16301 |
+
},
|
| 16302 |
+
{
|
| 16303 |
+
"epoch": 10.0,
|
| 16304 |
+
"grad_norm": 1.0168319940567017,
|
| 16305 |
+
"learning_rate": 8.750000000000001e-06,
|
| 16306 |
+
"loss": 0.0477,
|
| 16307 |
+
"step": 21340
|
| 16308 |
}
|
| 16309 |
],
|
| 16310 |
"logging_steps": 10,
|
|
|
|
| 16319 |
"should_evaluate": false,
|
| 16320 |
"should_log": false,
|
| 16321 |
"should_save": true,
|
| 16322 |
+
"should_training_stop": true
|
| 16323 |
},
|
| 16324 |
"attributes": {}
|
| 16325 |
}
|