Training in progress, step 1196, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 310152752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26c57f537c6e8cb1f250dc86e919d048561f018dbb08c57403f8908ff3476437
|
| 3 |
size 310152752
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 133909059
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5efc5ad94f15e851bd1bbc752176792db889009682cb9c361d1d3290ee91066f
|
| 3 |
size 133909059
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:304e4f2a6e2248bf62fc7d1ebac820e06d4e575b12b458ce60adbbc6486b8711
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1150,
|
| 3 |
"best_metric": 1.943885087966919,
|
| 4 |
"best_model_checkpoint": "outputs/checkpoint-1150",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -8242,6 +8242,328 @@
|
|
| 8242 |
"eval_samples_per_second": 22.611,
|
| 8243 |
"eval_steps_per_second": 2.885,
|
| 8244 |
"step": 1150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8245 |
}
|
| 8246 |
],
|
| 8247 |
"logging_steps": 1,
|
|
@@ -8256,12 +8578,12 @@
|
|
| 8256 |
"should_evaluate": false,
|
| 8257 |
"should_log": false,
|
| 8258 |
"should_save": true,
|
| 8259 |
-
"should_training_stop":
|
| 8260 |
},
|
| 8261 |
"attributes": {}
|
| 8262 |
}
|
| 8263 |
},
|
| 8264 |
-
"total_flos": 1.
|
| 8265 |
"train_batch_size": 12,
|
| 8266 |
"trial_name": null,
|
| 8267 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 1150,
|
| 3 |
"best_metric": 1.943885087966919,
|
| 4 |
"best_model_checkpoint": "outputs/checkpoint-1150",
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1196,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 8242 |
"eval_samples_per_second": 22.611,
|
| 8243 |
"eval_steps_per_second": 2.885,
|
| 8244 |
"step": 1150
|
| 8245 |
+
},
|
| 8246 |
+
{
|
| 8247 |
+
"epoch": 1.9247491638795986,
|
| 8248 |
+
"grad_norm": 0.22156818211078644,
|
| 8249 |
+
"learning_rate": 1.9850918440857958e-07,
|
| 8250 |
+
"loss": 1.7031984329223633,
|
| 8251 |
+
"step": 1151
|
| 8252 |
+
},
|
| 8253 |
+
{
|
| 8254 |
+
"epoch": 1.9264214046822743,
|
| 8255 |
+
"grad_norm": 0.24198277294635773,
|
| 8256 |
+
"learning_rate": 1.899829931495012e-07,
|
| 8257 |
+
"loss": 1.9431957006454468,
|
| 8258 |
+
"step": 1152
|
| 8259 |
+
},
|
| 8260 |
+
{
|
| 8261 |
+
"epoch": 1.92809364548495,
|
| 8262 |
+
"grad_norm": 0.23016783595085144,
|
| 8263 |
+
"learning_rate": 1.8164324970625645e-07,
|
| 8264 |
+
"loss": 1.9043176174163818,
|
| 8265 |
+
"step": 1153
|
| 8266 |
+
},
|
| 8267 |
+
{
|
| 8268 |
+
"epoch": 1.9297658862876255,
|
| 8269 |
+
"grad_norm": 0.23631806671619415,
|
| 8270 |
+
"learning_rate": 1.7349001675219245e-07,
|
| 8271 |
+
"loss": 2.0027687549591064,
|
| 8272 |
+
"step": 1154
|
| 8273 |
+
},
|
| 8274 |
+
{
|
| 8275 |
+
"epoch": 1.931438127090301,
|
| 8276 |
+
"grad_norm": 0.24670401215553284,
|
| 8277 |
+
"learning_rate": 1.6552335555903297e-07,
|
| 8278 |
+
"loss": 2.1321308612823486,
|
| 8279 |
+
"step": 1155
|
| 8280 |
+
},
|
| 8281 |
+
{
|
| 8282 |
+
"epoch": 1.9331103678929766,
|
| 8283 |
+
"grad_norm": 0.2432417869567871,
|
| 8284 |
+
"learning_rate": 1.577433259964123e-07,
|
| 8285 |
+
"loss": 1.8444653749465942,
|
| 8286 |
+
"step": 1156
|
| 8287 |
+
},
|
| 8288 |
+
{
|
| 8289 |
+
"epoch": 1.9347826086956523,
|
| 8290 |
+
"grad_norm": 0.2338031530380249,
|
| 8291 |
+
"learning_rate": 1.501499865314171e-07,
|
| 8292 |
+
"loss": 1.697690486907959,
|
| 8293 |
+
"step": 1157
|
| 8294 |
+
},
|
| 8295 |
+
{
|
| 8296 |
+
"epoch": 1.9364548494983278,
|
| 8297 |
+
"grad_norm": 0.23973986506462097,
|
| 8298 |
+
"learning_rate": 1.4274339422816195e-07,
|
| 8299 |
+
"loss": 1.9107983112335205,
|
| 8300 |
+
"step": 1158
|
| 8301 |
+
},
|
| 8302 |
+
{
|
| 8303 |
+
"epoch": 1.9381270903010033,
|
| 8304 |
+
"grad_norm": 0.2242889702320099,
|
| 8305 |
+
"learning_rate": 1.3552360474734793e-07,
|
| 8306 |
+
"loss": 1.8276660442352295,
|
| 8307 |
+
"step": 1159
|
| 8308 |
+
},
|
| 8309 |
+
{
|
| 8310 |
+
"epoch": 1.939799331103679,
|
| 8311 |
+
"grad_norm": 0.2711448669433594,
|
| 8312 |
+
"learning_rate": 1.284906723458462e-07,
|
| 8313 |
+
"loss": 1.7825736999511719,
|
| 8314 |
+
"step": 1160
|
| 8315 |
+
},
|
| 8316 |
+
{
|
| 8317 |
+
"epoch": 1.9414715719063547,
|
| 8318 |
+
"grad_norm": 0.2267025262117386,
|
| 8319 |
+
"learning_rate": 1.216446498763013e-07,
|
| 8320 |
+
"loss": 1.7734841108322144,
|
| 8321 |
+
"step": 1161
|
| 8322 |
+
},
|
| 8323 |
+
{
|
| 8324 |
+
"epoch": 1.9431438127090301,
|
| 8325 |
+
"grad_norm": 0.22980396449565887,
|
| 8326 |
+
"learning_rate": 1.1498558878672016e-07,
|
| 8327 |
+
"loss": 1.7618581056594849,
|
| 8328 |
+
"step": 1162
|
| 8329 |
+
},
|
| 8330 |
+
{
|
| 8331 |
+
"epoch": 1.9448160535117056,
|
| 8332 |
+
"grad_norm": 0.23873302340507507,
|
| 8333 |
+
"learning_rate": 1.0851353912008644e-07,
|
| 8334 |
+
"loss": 1.9185343980789185,
|
| 8335 |
+
"step": 1163
|
| 8336 |
+
},
|
| 8337 |
+
{
|
| 8338 |
+
"epoch": 1.9464882943143813,
|
| 8339 |
+
"grad_norm": 0.24374531209468842,
|
| 8340 |
+
"learning_rate": 1.0222854951399407e-07,
|
| 8341 |
+
"loss": 1.9559900760650635,
|
| 8342 |
+
"step": 1164
|
| 8343 |
+
},
|
| 8344 |
+
{
|
| 8345 |
+
"epoch": 1.948160535117057,
|
| 8346 |
+
"grad_norm": 0.23170500993728638,
|
| 8347 |
+
"learning_rate": 9.613066720028097e-08,
|
| 8348 |
+
"loss": 2.0244576930999756,
|
| 8349 |
+
"step": 1165
|
| 8350 |
+
},
|
| 8351 |
+
{
|
| 8352 |
+
"epoch": 1.9498327759197325,
|
| 8353 |
+
"grad_norm": 0.2283419668674469,
|
| 8354 |
+
"learning_rate": 9.021993800466256e-08,
|
| 8355 |
+
"loss": 1.7836267948150635,
|
| 8356 |
+
"step": 1166
|
| 8357 |
+
},
|
| 8358 |
+
{
|
| 8359 |
+
"epoch": 1.951505016722408,
|
| 8360 |
+
"grad_norm": 0.25570887327194214,
|
| 8361 |
+
"learning_rate": 8.449640634639878e-08,
|
| 8362 |
+
"loss": 2.202239513397217,
|
| 8363 |
+
"step": 1167
|
| 8364 |
+
},
|
| 8365 |
+
{
|
| 8366 |
+
"epoch": 1.9531772575250836,
|
| 8367 |
+
"grad_norm": 0.23577114939689636,
|
| 8368 |
+
"learning_rate": 7.896011523794988e-08,
|
| 8369 |
+
"loss": 1.9459967613220215,
|
| 8370 |
+
"step": 1168
|
| 8371 |
+
},
|
| 8372 |
+
{
|
| 8373 |
+
"epoch": 1.9548494983277593,
|
| 8374 |
+
"grad_norm": 0.23402273654937744,
|
| 8375 |
+
"learning_rate": 7.361110628466838e-08,
|
| 8376 |
+
"loss": 1.822799801826477,
|
| 8377 |
+
"step": 1169
|
| 8378 |
+
},
|
| 8379 |
+
{
|
| 8380 |
+
"epoch": 1.9565217391304348,
|
| 8381 |
+
"grad_norm": 0.23017290234565735,
|
| 8382 |
+
"learning_rate": 6.84494196844715e-08,
|
| 8383 |
+
"loss": 1.8911821842193604,
|
| 8384 |
+
"step": 1170
|
| 8385 |
+
},
|
| 8386 |
+
{
|
| 8387 |
+
"epoch": 1.9581939799331103,
|
| 8388 |
+
"grad_norm": 0.23453032970428467,
|
| 8389 |
+
"learning_rate": 6.347509422754139e-08,
|
| 8390 |
+
"loss": 1.8932383060455322,
|
| 8391 |
+
"step": 1171
|
| 8392 |
+
},
|
| 8393 |
+
{
|
| 8394 |
+
"epoch": 1.959866220735786,
|
| 8395 |
+
"grad_norm": 0.2325022965669632,
|
| 8396 |
+
"learning_rate": 5.868816729604765e-08,
|
| 8397 |
+
"loss": 1.9721827507019043,
|
| 8398 |
+
"step": 1172
|
| 8399 |
+
},
|
| 8400 |
+
{
|
| 8401 |
+
"epoch": 1.9615384615384617,
|
| 8402 |
+
"grad_norm": 0.2408953160047531,
|
| 8403 |
+
"learning_rate": 5.408867486384472e-08,
|
| 8404 |
+
"loss": 2.094602584838867,
|
| 8405 |
+
"step": 1173
|
| 8406 |
+
},
|
| 8407 |
+
{
|
| 8408 |
+
"epoch": 1.9632107023411371,
|
| 8409 |
+
"grad_norm": 0.23328392207622528,
|
| 8410 |
+
"learning_rate": 4.9676651496222136e-08,
|
| 8411 |
+
"loss": 1.8785374164581299,
|
| 8412 |
+
"step": 1174
|
| 8413 |
+
},
|
| 8414 |
+
{
|
| 8415 |
+
"epoch": 1.9648829431438126,
|
| 8416 |
+
"grad_norm": 0.2375405728816986,
|
| 8417 |
+
"learning_rate": 4.5452130349629694e-08,
|
| 8418 |
+
"loss": 2.090651273727417,
|
| 8419 |
+
"step": 1175
|
| 8420 |
+
},
|
| 8421 |
+
{
|
| 8422 |
+
"epoch": 1.9665551839464883,
|
| 8423 |
+
"grad_norm": 0.2384941726922989,
|
| 8424 |
+
"learning_rate": 4.141514317143602e-08,
|
| 8425 |
+
"loss": 1.932543396949768,
|
| 8426 |
+
"step": 1176
|
| 8427 |
+
},
|
| 8428 |
+
{
|
| 8429 |
+
"epoch": 1.968227424749164,
|
| 8430 |
+
"grad_norm": 0.22377879917621613,
|
| 8431 |
+
"learning_rate": 3.7565720299687076e-08,
|
| 8432 |
+
"loss": 1.8287705183029175,
|
| 8433 |
+
"step": 1177
|
| 8434 |
+
},
|
| 8435 |
+
{
|
| 8436 |
+
"epoch": 1.9698996655518395,
|
| 8437 |
+
"grad_norm": 0.22661490738391876,
|
| 8438 |
+
"learning_rate": 3.3903890662878576e-08,
|
| 8439 |
+
"loss": 1.8487858772277832,
|
| 8440 |
+
"step": 1178
|
| 8441 |
+
},
|
| 8442 |
+
{
|
| 8443 |
+
"epoch": 1.971571906354515,
|
| 8444 |
+
"grad_norm": 0.23505854606628418,
|
| 8445 |
+
"learning_rate": 3.0429681779739484e-08,
|
| 8446 |
+
"loss": 1.9943294525146484,
|
| 8447 |
+
"step": 1179
|
| 8448 |
+
},
|
| 8449 |
+
{
|
| 8450 |
+
"epoch": 1.9732441471571907,
|
| 8451 |
+
"grad_norm": 0.23116961121559143,
|
| 8452 |
+
"learning_rate": 2.7143119759026613e-08,
|
| 8453 |
+
"loss": 1.818049669265747,
|
| 8454 |
+
"step": 1180
|
| 8455 |
+
},
|
| 8456 |
+
{
|
| 8457 |
+
"epoch": 1.9749163879598663,
|
| 8458 |
+
"grad_norm": 0.22798657417297363,
|
| 8459 |
+
"learning_rate": 2.404422929932204e-08,
|
| 8460 |
+
"loss": 1.8310678005218506,
|
| 8461 |
+
"step": 1181
|
| 8462 |
+
},
|
| 8463 |
+
{
|
| 8464 |
+
"epoch": 1.9765886287625418,
|
| 8465 |
+
"grad_norm": 0.24219320714473724,
|
| 8466 |
+
"learning_rate": 2.113303368885822e-08,
|
| 8467 |
+
"loss": 1.975824236869812,
|
| 8468 |
+
"step": 1182
|
| 8469 |
+
},
|
| 8470 |
+
{
|
| 8471 |
+
"epoch": 1.9782608695652173,
|
| 8472 |
+
"grad_norm": 0.244304358959198,
|
| 8473 |
+
"learning_rate": 1.8409554805329245e-08,
|
| 8474 |
+
"loss": 2.0979132652282715,
|
| 8475 |
+
"step": 1183
|
| 8476 |
+
},
|
| 8477 |
+
{
|
| 8478 |
+
"epoch": 1.979933110367893,
|
| 8479 |
+
"grad_norm": 0.22320342063903809,
|
| 8480 |
+
"learning_rate": 1.5873813115740988e-08,
|
| 8481 |
+
"loss": 1.6685010194778442,
|
| 8482 |
+
"step": 1184
|
| 8483 |
+
},
|
| 8484 |
+
{
|
| 8485 |
+
"epoch": 1.9816053511705687,
|
| 8486 |
+
"grad_norm": 0.24459514021873474,
|
| 8487 |
+
"learning_rate": 1.3525827676247326e-08,
|
| 8488 |
+
"loss": 1.8652524948120117,
|
| 8489 |
+
"step": 1185
|
| 8490 |
+
},
|
| 8491 |
+
{
|
| 8492 |
+
"epoch": 1.9832775919732442,
|
| 8493 |
+
"grad_norm": 0.23350538313388824,
|
| 8494 |
+
"learning_rate": 1.1365616132008593e-08,
|
| 8495 |
+
"loss": 1.7653487920761108,
|
| 8496 |
+
"step": 1186
|
| 8497 |
+
},
|
| 8498 |
+
{
|
| 8499 |
+
"epoch": 1.9849498327759196,
|
| 8500 |
+
"grad_norm": 0.23290188610553741,
|
| 8501 |
+
"learning_rate": 9.393194717061127e-09,
|
| 8502 |
+
"loss": 1.9263311624526978,
|
| 8503 |
+
"step": 1187
|
| 8504 |
+
},
|
| 8505 |
+
{
|
| 8506 |
+
"epoch": 1.9866220735785953,
|
| 8507 |
+
"grad_norm": 0.23780138790607452,
|
| 8508 |
+
"learning_rate": 7.608578254195142e-09,
|
| 8509 |
+
"loss": 2.1235318183898926,
|
| 8510 |
+
"step": 1188
|
| 8511 |
+
},
|
| 8512 |
+
{
|
| 8513 |
+
"epoch": 1.988294314381271,
|
| 8514 |
+
"grad_norm": 0.221963033080101,
|
| 8515 |
+
"learning_rate": 6.0117801548437155e-09,
|
| 8516 |
+
"loss": 1.9430090188980103,
|
| 8517 |
+
"step": 1189
|
| 8518 |
+
},
|
| 8519 |
+
{
|
| 8520 |
+
"epoch": 1.9899665551839465,
|
| 8521 |
+
"grad_norm": 0.24497728049755096,
|
| 8522 |
+
"learning_rate": 4.602812418974534e-09,
|
| 8523 |
+
"loss": 1.9605302810668945,
|
| 8524 |
+
"step": 1190
|
| 8525 |
+
},
|
| 8526 |
+
{
|
| 8527 |
+
"epoch": 1.991638795986622,
|
| 8528 |
+
"grad_norm": 0.24507929384708405,
|
| 8529 |
+
"learning_rate": 3.3816856350177284e-09,
|
| 8530 |
+
"loss": 1.8447235822677612,
|
| 8531 |
+
"step": 1191
|
| 8532 |
+
},
|
| 8533 |
+
{
|
| 8534 |
+
"epoch": 1.9933110367892977,
|
| 8535 |
+
"grad_norm": 0.2350272387266159,
|
| 8536 |
+
"learning_rate": 2.348408979760408e-09,
|
| 8537 |
+
"loss": 1.8975446224212646,
|
| 8538 |
+
"step": 1192
|
| 8539 |
+
},
|
| 8540 |
+
{
|
| 8541 |
+
"epoch": 1.9949832775919734,
|
| 8542 |
+
"grad_norm": 0.24266520142555237,
|
| 8543 |
+
"learning_rate": 1.502990218302247e-09,
|
| 8544 |
+
"loss": 1.9446773529052734,
|
| 8545 |
+
"step": 1193
|
| 8546 |
+
},
|
| 8547 |
+
{
|
| 8548 |
+
"epoch": 1.9966555183946488,
|
| 8549 |
+
"grad_norm": 0.23780354857444763,
|
| 8550 |
+
"learning_rate": 8.454357039860971e-10,
|
| 8551 |
+
"loss": 1.9795520305633545,
|
| 8552 |
+
"step": 1194
|
| 8553 |
+
},
|
| 8554 |
+
{
|
| 8555 |
+
"epoch": 1.9983277591973243,
|
| 8556 |
+
"grad_norm": 0.2187688648700714,
|
| 8557 |
+
"learning_rate": 3.757503783424765e-10,
|
| 8558 |
+
"loss": 1.903891682624817,
|
| 8559 |
+
"step": 1195
|
| 8560 |
+
},
|
| 8561 |
+
{
|
| 8562 |
+
"epoch": 2.0,
|
| 8563 |
+
"grad_norm": 0.2732059061527252,
|
| 8564 |
+
"learning_rate": 9.393777107291613e-11,
|
| 8565 |
+
"loss": 2.0153965950012207,
|
| 8566 |
+
"step": 1196
|
| 8567 |
}
|
| 8568 |
],
|
| 8569 |
"logging_steps": 1,
|
|
|
|
| 8578 |
"should_evaluate": false,
|
| 8579 |
"should_log": false,
|
| 8580 |
"should_save": true,
|
| 8581 |
+
"should_training_stop": true
|
| 8582 |
},
|
| 8583 |
"attributes": {}
|
| 8584 |
}
|
| 8585 |
},
|
| 8586 |
+
"total_flos": 1.4028145166609603e+18,
|
| 8587 |
"train_batch_size": 12,
|
| 8588 |
"trial_name": null,
|
| 8589 |
"trial_params": null
|