Training in progress, step 570000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bb93720ada86a6004ba26eb4fabd56849226e35d2f46baf4052697153c666bb
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e62e41706c3cdebd0963ceae6fb24ae079cf26e6452a67e31e4c02f3a80456e6
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 6.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11206,11 +11206,211 @@
|
|
| 11206 |
"eval_samples_per_second": 882.169,
|
| 11207 |
"eval_steps_per_second": 13.826,
|
| 11208 |
"step": 560000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11209 |
}
|
| 11210 |
],
|
| 11211 |
"max_steps": 1000000,
|
| 11212 |
"num_train_epochs": 12,
|
| 11213 |
-
"total_flos": 3.
|
| 11214 |
"trial_name": null,
|
| 11215 |
"trial_params": null
|
| 11216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.357137280706646,
|
| 5 |
+
"global_step": 570000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11206 |
"eval_samples_per_second": 882.169,
|
| 11207 |
"eval_steps_per_second": 13.826,
|
| 11208 |
"step": 560000
|
| 11209 |
+
},
|
| 11210 |
+
{
|
| 11211 |
+
"epoch": 6.25,
|
| 11212 |
+
"learning_rate": 7.180111803267856e-05,
|
| 11213 |
+
"loss": 0.2054,
|
| 11214 |
+
"step": 560500
|
| 11215 |
+
},
|
| 11216 |
+
{
|
| 11217 |
+
"epoch": 6.26,
|
| 11218 |
+
"learning_rate": 7.168618306865838e-05,
|
| 11219 |
+
"loss": 0.2051,
|
| 11220 |
+
"step": 561000
|
| 11221 |
+
},
|
| 11222 |
+
{
|
| 11223 |
+
"epoch": 6.26,
|
| 11224 |
+
"eval_loss": 0.195304736495018,
|
| 11225 |
+
"eval_runtime": 2.4332,
|
| 11226 |
+
"eval_samples_per_second": 944.014,
|
| 11227 |
+
"eval_steps_per_second": 14.795,
|
| 11228 |
+
"step": 561000
|
| 11229 |
+
},
|
| 11230 |
+
{
|
| 11231 |
+
"epoch": 6.26,
|
| 11232 |
+
"learning_rate": 7.157127083429626e-05,
|
| 11233 |
+
"loss": 0.2053,
|
| 11234 |
+
"step": 561500
|
| 11235 |
+
},
|
| 11236 |
+
{
|
| 11237 |
+
"epoch": 6.27,
|
| 11238 |
+
"learning_rate": 7.145638164375779e-05,
|
| 11239 |
+
"loss": 0.205,
|
| 11240 |
+
"step": 562000
|
| 11241 |
+
},
|
| 11242 |
+
{
|
| 11243 |
+
"epoch": 6.27,
|
| 11244 |
+
"eval_loss": 0.1958540827035904,
|
| 11245 |
+
"eval_runtime": 2.6408,
|
| 11246 |
+
"eval_samples_per_second": 869.814,
|
| 11247 |
+
"eval_steps_per_second": 13.632,
|
| 11248 |
+
"step": 562000
|
| 11249 |
+
},
|
| 11250 |
+
{
|
| 11251 |
+
"epoch": 6.27,
|
| 11252 |
+
"learning_rate": 7.134151581114565e-05,
|
| 11253 |
+
"loss": 0.2053,
|
| 11254 |
+
"step": 562500
|
| 11255 |
+
},
|
| 11256 |
+
{
|
| 11257 |
+
"epoch": 6.28,
|
| 11258 |
+
"learning_rate": 7.122667365049869e-05,
|
| 11259 |
+
"loss": 0.2052,
|
| 11260 |
+
"step": 563000
|
| 11261 |
+
},
|
| 11262 |
+
{
|
| 11263 |
+
"epoch": 6.28,
|
| 11264 |
+
"eval_loss": 0.19526307284832,
|
| 11265 |
+
"eval_runtime": 2.6193,
|
| 11266 |
+
"eval_samples_per_second": 876.966,
|
| 11267 |
+
"eval_steps_per_second": 13.744,
|
| 11268 |
+
"step": 563000
|
| 11269 |
+
},
|
| 11270 |
+
{
|
| 11271 |
+
"epoch": 6.28,
|
| 11272 |
+
"learning_rate": 7.111185547579099e-05,
|
| 11273 |
+
"loss": 0.205,
|
| 11274 |
+
"step": 563500
|
| 11275 |
+
},
|
| 11276 |
+
{
|
| 11277 |
+
"epoch": 6.29,
|
| 11278 |
+
"learning_rate": 7.099706160093098e-05,
|
| 11279 |
+
"loss": 0.2051,
|
| 11280 |
+
"step": 564000
|
| 11281 |
+
},
|
| 11282 |
+
{
|
| 11283 |
+
"epoch": 6.29,
|
| 11284 |
+
"eval_loss": 0.1962643265724182,
|
| 11285 |
+
"eval_runtime": 2.4959,
|
| 11286 |
+
"eval_samples_per_second": 920.299,
|
| 11287 |
+
"eval_steps_per_second": 14.423,
|
| 11288 |
+
"step": 564000
|
| 11289 |
+
},
|
| 11290 |
+
{
|
| 11291 |
+
"epoch": 6.3,
|
| 11292 |
+
"learning_rate": 7.08822923397608e-05,
|
| 11293 |
+
"loss": 0.2054,
|
| 11294 |
+
"step": 564500
|
| 11295 |
+
},
|
| 11296 |
+
{
|
| 11297 |
+
"epoch": 6.3,
|
| 11298 |
+
"learning_rate": 7.076754800605516e-05,
|
| 11299 |
+
"loss": 0.2053,
|
| 11300 |
+
"step": 565000
|
| 11301 |
+
},
|
| 11302 |
+
{
|
| 11303 |
+
"epoch": 6.3,
|
| 11304 |
+
"eval_loss": 0.19500210881233215,
|
| 11305 |
+
"eval_runtime": 2.6355,
|
| 11306 |
+
"eval_samples_per_second": 871.546,
|
| 11307 |
+
"eval_steps_per_second": 13.659,
|
| 11308 |
+
"step": 565000
|
| 11309 |
+
},
|
| 11310 |
+
{
|
| 11311 |
+
"epoch": 6.31,
|
| 11312 |
+
"learning_rate": 7.065282891352078e-05,
|
| 11313 |
+
"loss": 0.2049,
|
| 11314 |
+
"step": 565500
|
| 11315 |
+
},
|
| 11316 |
+
{
|
| 11317 |
+
"epoch": 6.31,
|
| 11318 |
+
"learning_rate": 7.053813537579523e-05,
|
| 11319 |
+
"loss": 0.2052,
|
| 11320 |
+
"step": 566000
|
| 11321 |
+
},
|
| 11322 |
+
{
|
| 11323 |
+
"epoch": 6.31,
|
| 11324 |
+
"eval_loss": 0.1964665800333023,
|
| 11325 |
+
"eval_runtime": 2.6178,
|
| 11326 |
+
"eval_samples_per_second": 877.444,
|
| 11327 |
+
"eval_steps_per_second": 13.752,
|
| 11328 |
+
"step": 566000
|
| 11329 |
+
},
|
| 11330 |
+
{
|
| 11331 |
+
"epoch": 6.32,
|
| 11332 |
+
"learning_rate": 7.042346770644624e-05,
|
| 11333 |
+
"loss": 0.2046,
|
| 11334 |
+
"step": 566500
|
| 11335 |
+
},
|
| 11336 |
+
{
|
| 11337 |
+
"epoch": 6.32,
|
| 11338 |
+
"learning_rate": 7.030882621897088e-05,
|
| 11339 |
+
"loss": 0.2046,
|
| 11340 |
+
"step": 567000
|
| 11341 |
+
},
|
| 11342 |
+
{
|
| 11343 |
+
"epoch": 6.32,
|
| 11344 |
+
"eval_loss": 0.19378143548965454,
|
| 11345 |
+
"eval_runtime": 2.6471,
|
| 11346 |
+
"eval_samples_per_second": 867.729,
|
| 11347 |
+
"eval_steps_per_second": 13.6,
|
| 11348 |
+
"step": 567000
|
| 11349 |
+
},
|
| 11350 |
+
{
|
| 11351 |
+
"epoch": 6.33,
|
| 11352 |
+
"learning_rate": 7.019421122679455e-05,
|
| 11353 |
+
"loss": 0.2052,
|
| 11354 |
+
"step": 567500
|
| 11355 |
+
},
|
| 11356 |
+
{
|
| 11357 |
+
"epoch": 6.33,
|
| 11358 |
+
"learning_rate": 7.00796230432703e-05,
|
| 11359 |
+
"loss": 0.2045,
|
| 11360 |
+
"step": 568000
|
| 11361 |
+
},
|
| 11362 |
+
{
|
| 11363 |
+
"epoch": 6.33,
|
| 11364 |
+
"eval_loss": 0.1938391774892807,
|
| 11365 |
+
"eval_runtime": 2.5793,
|
| 11366 |
+
"eval_samples_per_second": 890.552,
|
| 11367 |
+
"eval_steps_per_second": 13.957,
|
| 11368 |
+
"step": 568000
|
| 11369 |
+
},
|
| 11370 |
+
{
|
| 11371 |
+
"epoch": 6.34,
|
| 11372 |
+
"learning_rate": 6.996506198167789e-05,
|
| 11373 |
+
"loss": 0.2046,
|
| 11374 |
+
"step": 568500
|
| 11375 |
+
},
|
| 11376 |
+
{
|
| 11377 |
+
"epoch": 6.35,
|
| 11378 |
+
"learning_rate": 6.985052835522279e-05,
|
| 11379 |
+
"loss": 0.2045,
|
| 11380 |
+
"step": 569000
|
| 11381 |
+
},
|
| 11382 |
+
{
|
| 11383 |
+
"epoch": 6.35,
|
| 11384 |
+
"eval_loss": 0.19408397376537323,
|
| 11385 |
+
"eval_runtime": 2.5021,
|
| 11386 |
+
"eval_samples_per_second": 918.029,
|
| 11387 |
+
"eval_steps_per_second": 14.388,
|
| 11388 |
+
"step": 569000
|
| 11389 |
+
},
|
| 11390 |
+
{
|
| 11391 |
+
"epoch": 6.35,
|
| 11392 |
+
"learning_rate": 6.973602247703561e-05,
|
| 11393 |
+
"loss": 0.2047,
|
| 11394 |
+
"step": 569500
|
| 11395 |
+
},
|
| 11396 |
+
{
|
| 11397 |
+
"epoch": 6.36,
|
| 11398 |
+
"learning_rate": 6.962154466017105e-05,
|
| 11399 |
+
"loss": 0.2047,
|
| 11400 |
+
"step": 570000
|
| 11401 |
+
},
|
| 11402 |
+
{
|
| 11403 |
+
"epoch": 6.36,
|
| 11404 |
+
"eval_loss": 0.19305509328842163,
|
| 11405 |
+
"eval_runtime": 2.5881,
|
| 11406 |
+
"eval_samples_per_second": 887.536,
|
| 11407 |
+
"eval_steps_per_second": 13.91,
|
| 11408 |
+
"step": 570000
|
| 11409 |
}
|
| 11410 |
],
|
| 11411 |
"max_steps": 1000000,
|
| 11412 |
"num_train_epochs": 12,
|
| 11413 |
+
"total_flos": 3.9956820200446935e+22,
|
| 11414 |
"trial_name": null,
|
| 11415 |
"trial_params": null
|
| 11416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
|
| 3 |
size 449471589
|