Training in progress, step 16000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42301bc164cb007a8e9ffaaebd3b674826efaacc96f02799ea8c54ebdf5beff1
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e166c3997353d811bb7375dab7e17cf88064b52029e8056c729ba4ae8d2e8f22
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8653c4f16bb3c4531444bd438e2a397c259c928e9f5a96f450fc3aa43ef0f5c
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91185d0e7a47d1f7979000c680b3a146a800c2ff31f983b75b24ceb331884072
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be0be34d9684d804e2f3030fceca4c7b93603e6596a44aaf270c97cb1740b1da
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e539799e7e99b66c33c364546118319f901c9765aa17eaf7cf8b17906c00c95a
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccc2a52ae0327def30cc40f7f273a4a1537961b9b580753fe57ec7ecdab69b35
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5258,6 +5258,356 @@
|
|
| 5258 |
"learning_rate": 0.0004952863526922635,
|
| 5259 |
"loss": 20.6348,
|
| 5260 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5261 |
}
|
| 5262 |
],
|
| 5263 |
"logging_steps": 20,
|
|
@@ -5277,7 +5627,7 @@
|
|
| 5277 |
"attributes": {}
|
| 5278 |
}
|
| 5279 |
},
|
| 5280 |
-
"total_flos": 1.
|
| 5281 |
"train_batch_size": 48,
|
| 5282 |
"trial_name": null,
|
| 5283 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.031199574905791908,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 16000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5258 |
"learning_rate": 0.0004952863526922635,
|
| 5259 |
"loss": 20.6348,
|
| 5260 |
"step": 15000
|
| 5261 |
+
},
|
| 5262 |
+
{
|
| 5263 |
+
"epoch": 0.029288600942812153,
|
| 5264 |
+
"grad_norm": 11.0,
|
| 5265 |
+
"learning_rate": 0.000495279850661354,
|
| 5266 |
+
"loss": 20.6052,
|
| 5267 |
+
"step": 15020
|
| 5268 |
+
},
|
| 5269 |
+
{
|
| 5270 |
+
"epoch": 0.029327600411444395,
|
| 5271 |
+
"grad_norm": 11.125,
|
| 5272 |
+
"learning_rate": 0.0004952733486304447,
|
| 5273 |
+
"loss": 20.6334,
|
| 5274 |
+
"step": 15040
|
| 5275 |
+
},
|
| 5276 |
+
{
|
| 5277 |
+
"epoch": 0.029366599880076633,
|
| 5278 |
+
"grad_norm": 11.3125,
|
| 5279 |
+
"learning_rate": 0.0004952668465995353,
|
| 5280 |
+
"loss": 20.6751,
|
| 5281 |
+
"step": 15060
|
| 5282 |
+
},
|
| 5283 |
+
{
|
| 5284 |
+
"epoch": 0.029405599348708875,
|
| 5285 |
+
"grad_norm": 14.75,
|
| 5286 |
+
"learning_rate": 0.000495260344568626,
|
| 5287 |
+
"loss": 20.5701,
|
| 5288 |
+
"step": 15080
|
| 5289 |
+
},
|
| 5290 |
+
{
|
| 5291 |
+
"epoch": 0.029444598817341114,
|
| 5292 |
+
"grad_norm": 12.0,
|
| 5293 |
+
"learning_rate": 0.0004952538425377166,
|
| 5294 |
+
"loss": 20.5664,
|
| 5295 |
+
"step": 15100
|
| 5296 |
+
},
|
| 5297 |
+
{
|
| 5298 |
+
"epoch": 0.029483598285973352,
|
| 5299 |
+
"grad_norm": 12.625,
|
| 5300 |
+
"learning_rate": 0.0004952473405068073,
|
| 5301 |
+
"loss": 20.5255,
|
| 5302 |
+
"step": 15120
|
| 5303 |
+
},
|
| 5304 |
+
{
|
| 5305 |
+
"epoch": 0.029522597754605594,
|
| 5306 |
+
"grad_norm": 11.0,
|
| 5307 |
+
"learning_rate": 0.000495240838475898,
|
| 5308 |
+
"loss": 20.5584,
|
| 5309 |
+
"step": 15140
|
| 5310 |
+
},
|
| 5311 |
+
{
|
| 5312 |
+
"epoch": 0.029561597223237833,
|
| 5313 |
+
"grad_norm": 13.0625,
|
| 5314 |
+
"learning_rate": 0.0004952343364449886,
|
| 5315 |
+
"loss": 20.6137,
|
| 5316 |
+
"step": 15160
|
| 5317 |
+
},
|
| 5318 |
+
{
|
| 5319 |
+
"epoch": 0.029600596691870075,
|
| 5320 |
+
"grad_norm": 11.125,
|
| 5321 |
+
"learning_rate": 0.0004952278344140793,
|
| 5322 |
+
"loss": 20.5632,
|
| 5323 |
+
"step": 15180
|
| 5324 |
+
},
|
| 5325 |
+
{
|
| 5326 |
+
"epoch": 0.029639596160502314,
|
| 5327 |
+
"grad_norm": 10.8125,
|
| 5328 |
+
"learning_rate": 0.0004952213323831699,
|
| 5329 |
+
"loss": 20.5356,
|
| 5330 |
+
"step": 15200
|
| 5331 |
+
},
|
| 5332 |
+
{
|
| 5333 |
+
"epoch": 0.029678595629134552,
|
| 5334 |
+
"grad_norm": 12.25,
|
| 5335 |
+
"learning_rate": 0.0004952148303522606,
|
| 5336 |
+
"loss": 20.5935,
|
| 5337 |
+
"step": 15220
|
| 5338 |
+
},
|
| 5339 |
+
{
|
| 5340 |
+
"epoch": 0.029717595097766794,
|
| 5341 |
+
"grad_norm": 9.875,
|
| 5342 |
+
"learning_rate": 0.0004952083283213512,
|
| 5343 |
+
"loss": 20.6133,
|
| 5344 |
+
"step": 15240
|
| 5345 |
+
},
|
| 5346 |
+
{
|
| 5347 |
+
"epoch": 0.029756594566399033,
|
| 5348 |
+
"grad_norm": 12.75,
|
| 5349 |
+
"learning_rate": 0.0004952018262904418,
|
| 5350 |
+
"loss": 20.6443,
|
| 5351 |
+
"step": 15260
|
| 5352 |
+
},
|
| 5353 |
+
{
|
| 5354 |
+
"epoch": 0.02979559403503127,
|
| 5355 |
+
"grad_norm": 13.4375,
|
| 5356 |
+
"learning_rate": 0.0004951953242595325,
|
| 5357 |
+
"loss": 20.6207,
|
| 5358 |
+
"step": 15280
|
| 5359 |
+
},
|
| 5360 |
+
{
|
| 5361 |
+
"epoch": 0.029834593503663513,
|
| 5362 |
+
"grad_norm": 10.0625,
|
| 5363 |
+
"learning_rate": 0.0004951888222286231,
|
| 5364 |
+
"loss": 20.5027,
|
| 5365 |
+
"step": 15300
|
| 5366 |
+
},
|
| 5367 |
+
{
|
| 5368 |
+
"epoch": 0.029873592972295752,
|
| 5369 |
+
"grad_norm": 11.8125,
|
| 5370 |
+
"learning_rate": 0.0004951823201977138,
|
| 5371 |
+
"loss": 20.534,
|
| 5372 |
+
"step": 15320
|
| 5373 |
+
},
|
| 5374 |
+
{
|
| 5375 |
+
"epoch": 0.029912592440927994,
|
| 5376 |
+
"grad_norm": 11.0625,
|
| 5377 |
+
"learning_rate": 0.0004951758181668044,
|
| 5378 |
+
"loss": 20.5885,
|
| 5379 |
+
"step": 15340
|
| 5380 |
+
},
|
| 5381 |
+
{
|
| 5382 |
+
"epoch": 0.029951591909560232,
|
| 5383 |
+
"grad_norm": 11.1875,
|
| 5384 |
+
"learning_rate": 0.0004951693161358951,
|
| 5385 |
+
"loss": 20.6046,
|
| 5386 |
+
"step": 15360
|
| 5387 |
+
},
|
| 5388 |
+
{
|
| 5389 |
+
"epoch": 0.02999059137819247,
|
| 5390 |
+
"grad_norm": 11.6875,
|
| 5391 |
+
"learning_rate": 0.0004951628141049857,
|
| 5392 |
+
"loss": 20.4537,
|
| 5393 |
+
"step": 15380
|
| 5394 |
+
},
|
| 5395 |
+
{
|
| 5396 |
+
"epoch": 0.030029590846824713,
|
| 5397 |
+
"grad_norm": 11.1875,
|
| 5398 |
+
"learning_rate": 0.0004951563120740764,
|
| 5399 |
+
"loss": 20.4366,
|
| 5400 |
+
"step": 15400
|
| 5401 |
+
},
|
| 5402 |
+
{
|
| 5403 |
+
"epoch": 0.03006859031545695,
|
| 5404 |
+
"grad_norm": 12.3125,
|
| 5405 |
+
"learning_rate": 0.000495149810043167,
|
| 5406 |
+
"loss": 20.5313,
|
| 5407 |
+
"step": 15420
|
| 5408 |
+
},
|
| 5409 |
+
{
|
| 5410 |
+
"epoch": 0.030107589784089193,
|
| 5411 |
+
"grad_norm": 12.5,
|
| 5412 |
+
"learning_rate": 0.0004951433080122577,
|
| 5413 |
+
"loss": 20.5595,
|
| 5414 |
+
"step": 15440
|
| 5415 |
+
},
|
| 5416 |
+
{
|
| 5417 |
+
"epoch": 0.030146589252721432,
|
| 5418 |
+
"grad_norm": 10.75,
|
| 5419 |
+
"learning_rate": 0.0004951368059813483,
|
| 5420 |
+
"loss": 20.4318,
|
| 5421 |
+
"step": 15460
|
| 5422 |
+
},
|
| 5423 |
+
{
|
| 5424 |
+
"epoch": 0.03018558872135367,
|
| 5425 |
+
"grad_norm": 12.8125,
|
| 5426 |
+
"learning_rate": 0.0004951303039504389,
|
| 5427 |
+
"loss": 20.5356,
|
| 5428 |
+
"step": 15480
|
| 5429 |
+
},
|
| 5430 |
+
{
|
| 5431 |
+
"epoch": 0.030224588189985913,
|
| 5432 |
+
"grad_norm": 11.75,
|
| 5433 |
+
"learning_rate": 0.0004951238019195296,
|
| 5434 |
+
"loss": 20.594,
|
| 5435 |
+
"step": 15500
|
| 5436 |
+
},
|
| 5437 |
+
{
|
| 5438 |
+
"epoch": 0.03026358765861815,
|
| 5439 |
+
"grad_norm": 11.125,
|
| 5440 |
+
"learning_rate": 0.0004951172998886202,
|
| 5441 |
+
"loss": 20.5289,
|
| 5442 |
+
"step": 15520
|
| 5443 |
+
},
|
| 5444 |
+
{
|
| 5445 |
+
"epoch": 0.03030258712725039,
|
| 5446 |
+
"grad_norm": 12.4375,
|
| 5447 |
+
"learning_rate": 0.0004951107978577109,
|
| 5448 |
+
"loss": 20.4482,
|
| 5449 |
+
"step": 15540
|
| 5450 |
+
},
|
| 5451 |
+
{
|
| 5452 |
+
"epoch": 0.03034158659588263,
|
| 5453 |
+
"grad_norm": 11.1875,
|
| 5454 |
+
"learning_rate": 0.0004951042958268015,
|
| 5455 |
+
"loss": 20.4001,
|
| 5456 |
+
"step": 15560
|
| 5457 |
+
},
|
| 5458 |
+
{
|
| 5459 |
+
"epoch": 0.03038058606451487,
|
| 5460 |
+
"grad_norm": 13.0625,
|
| 5461 |
+
"learning_rate": 0.0004950977937958922,
|
| 5462 |
+
"loss": 20.3405,
|
| 5463 |
+
"step": 15580
|
| 5464 |
+
},
|
| 5465 |
+
{
|
| 5466 |
+
"epoch": 0.030419585533147112,
|
| 5467 |
+
"grad_norm": 13.5625,
|
| 5468 |
+
"learning_rate": 0.0004950912917649827,
|
| 5469 |
+
"loss": 20.435,
|
| 5470 |
+
"step": 15600
|
| 5471 |
+
},
|
| 5472 |
+
{
|
| 5473 |
+
"epoch": 0.03045858500177935,
|
| 5474 |
+
"grad_norm": 11.25,
|
| 5475 |
+
"learning_rate": 0.0004950847897340734,
|
| 5476 |
+
"loss": 20.4817,
|
| 5477 |
+
"step": 15620
|
| 5478 |
+
},
|
| 5479 |
+
{
|
| 5480 |
+
"epoch": 0.03049758447041159,
|
| 5481 |
+
"grad_norm": 10.75,
|
| 5482 |
+
"learning_rate": 0.0004950782877031641,
|
| 5483 |
+
"loss": 20.4889,
|
| 5484 |
+
"step": 15640
|
| 5485 |
+
},
|
| 5486 |
+
{
|
| 5487 |
+
"epoch": 0.03053658393904383,
|
| 5488 |
+
"grad_norm": 12.25,
|
| 5489 |
+
"learning_rate": 0.0004950717856722547,
|
| 5490 |
+
"loss": 20.4209,
|
| 5491 |
+
"step": 15660
|
| 5492 |
+
},
|
| 5493 |
+
{
|
| 5494 |
+
"epoch": 0.03057558340767607,
|
| 5495 |
+
"grad_norm": 11.125,
|
| 5496 |
+
"learning_rate": 0.0004950652836413454,
|
| 5497 |
+
"loss": 20.401,
|
| 5498 |
+
"step": 15680
|
| 5499 |
+
},
|
| 5500 |
+
{
|
| 5501 |
+
"epoch": 0.030614582876308312,
|
| 5502 |
+
"grad_norm": 12.0,
|
| 5503 |
+
"learning_rate": 0.000495058781610436,
|
| 5504 |
+
"loss": 20.4579,
|
| 5505 |
+
"step": 15700
|
| 5506 |
+
},
|
| 5507 |
+
{
|
| 5508 |
+
"epoch": 0.03065358234494055,
|
| 5509 |
+
"grad_norm": 10.9375,
|
| 5510 |
+
"learning_rate": 0.0004950522795795267,
|
| 5511 |
+
"loss": 20.4935,
|
| 5512 |
+
"step": 15720
|
| 5513 |
+
},
|
| 5514 |
+
{
|
| 5515 |
+
"epoch": 0.03069258181357279,
|
| 5516 |
+
"grad_norm": 11.75,
|
| 5517 |
+
"learning_rate": 0.0004950457775486173,
|
| 5518 |
+
"loss": 20.4301,
|
| 5519 |
+
"step": 15740
|
| 5520 |
+
},
|
| 5521 |
+
{
|
| 5522 |
+
"epoch": 0.03073158128220503,
|
| 5523 |
+
"grad_norm": 11.6875,
|
| 5524 |
+
"learning_rate": 0.0004950392755177079,
|
| 5525 |
+
"loss": 20.3754,
|
| 5526 |
+
"step": 15760
|
| 5527 |
+
},
|
| 5528 |
+
{
|
| 5529 |
+
"epoch": 0.03077058075083727,
|
| 5530 |
+
"grad_norm": 10.625,
|
| 5531 |
+
"learning_rate": 0.0004950327734867985,
|
| 5532 |
+
"loss": 20.4608,
|
| 5533 |
+
"step": 15780
|
| 5534 |
+
},
|
| 5535 |
+
{
|
| 5536 |
+
"epoch": 0.030809580219469508,
|
| 5537 |
+
"grad_norm": 11.875,
|
| 5538 |
+
"learning_rate": 0.0004950262714558892,
|
| 5539 |
+
"loss": 20.5408,
|
| 5540 |
+
"step": 15800
|
| 5541 |
+
},
|
| 5542 |
+
{
|
| 5543 |
+
"epoch": 0.03084857968810175,
|
| 5544 |
+
"grad_norm": 11.125,
|
| 5545 |
+
"learning_rate": 0.0004950197694249799,
|
| 5546 |
+
"loss": 20.3624,
|
| 5547 |
+
"step": 15820
|
| 5548 |
+
},
|
| 5549 |
+
{
|
| 5550 |
+
"epoch": 0.03088757915673399,
|
| 5551 |
+
"grad_norm": 10.75,
|
| 5552 |
+
"learning_rate": 0.0004950132673940705,
|
| 5553 |
+
"loss": 20.3549,
|
| 5554 |
+
"step": 15840
|
| 5555 |
+
},
|
| 5556 |
+
{
|
| 5557 |
+
"epoch": 0.03092657862536623,
|
| 5558 |
+
"grad_norm": 13.6875,
|
| 5559 |
+
"learning_rate": 0.0004950067653631612,
|
| 5560 |
+
"loss": 20.3933,
|
| 5561 |
+
"step": 15860
|
| 5562 |
+
},
|
| 5563 |
+
{
|
| 5564 |
+
"epoch": 0.03096557809399847,
|
| 5565 |
+
"grad_norm": 12.6875,
|
| 5566 |
+
"learning_rate": 0.0004950002633322518,
|
| 5567 |
+
"loss": 20.3452,
|
| 5568 |
+
"step": 15880
|
| 5569 |
+
},
|
| 5570 |
+
{
|
| 5571 |
+
"epoch": 0.031004577562630708,
|
| 5572 |
+
"grad_norm": 11.0625,
|
| 5573 |
+
"learning_rate": 0.0004949937613013425,
|
| 5574 |
+
"loss": 20.4437,
|
| 5575 |
+
"step": 15900
|
| 5576 |
+
},
|
| 5577 |
+
{
|
| 5578 |
+
"epoch": 0.03104357703126295,
|
| 5579 |
+
"grad_norm": 9.6875,
|
| 5580 |
+
"learning_rate": 0.0004949872592704331,
|
| 5581 |
+
"loss": 20.3318,
|
| 5582 |
+
"step": 15920
|
| 5583 |
+
},
|
| 5584 |
+
{
|
| 5585 |
+
"epoch": 0.03108257649989519,
|
| 5586 |
+
"grad_norm": 10.375,
|
| 5587 |
+
"learning_rate": 0.0004949807572395238,
|
| 5588 |
+
"loss": 20.3704,
|
| 5589 |
+
"step": 15940
|
| 5590 |
+
},
|
| 5591 |
+
{
|
| 5592 |
+
"epoch": 0.03112157596852743,
|
| 5593 |
+
"grad_norm": 11.625,
|
| 5594 |
+
"learning_rate": 0.0004949742552086145,
|
| 5595 |
+
"loss": 20.3497,
|
| 5596 |
+
"step": 15960
|
| 5597 |
+
},
|
| 5598 |
+
{
|
| 5599 |
+
"epoch": 0.03116057543715967,
|
| 5600 |
+
"grad_norm": 11.9375,
|
| 5601 |
+
"learning_rate": 0.0004949677531777051,
|
| 5602 |
+
"loss": 20.4226,
|
| 5603 |
+
"step": 15980
|
| 5604 |
+
},
|
| 5605 |
+
{
|
| 5606 |
+
"epoch": 0.031199574905791908,
|
| 5607 |
+
"grad_norm": 14.125,
|
| 5608 |
+
"learning_rate": 0.0004949612511467957,
|
| 5609 |
+
"loss": 20.3333,
|
| 5610 |
+
"step": 16000
|
| 5611 |
}
|
| 5612 |
],
|
| 5613 |
"logging_steps": 20,
|
|
|
|
| 5627 |
"attributes": {}
|
| 5628 |
}
|
| 5629 |
},
|
| 5630 |
+
"total_flos": 1.176271382718605e+19,
|
| 5631 |
"train_batch_size": 48,
|
| 5632 |
"trial_name": null,
|
| 5633 |
"trial_params": null
|