Training in progress, step 970000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de1b3977166c4e20fc41f424497409f61e5dbee702d8ad14048093e7cfab3225
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebc5b0eb8c3bb3dd69db934b29e5beccddfae1a796cf948830a8b05766d44497
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9af3eb0d3db8162f6de4427ee5f19b1787f4bdb865e0ebda13f4fed6034a8890
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -19206,11 +19206,211 @@
|
|
| 19206 |
"eval_samples_per_second": 837.549,
|
| 19207 |
"eval_steps_per_second": 13.127,
|
| 19208 |
"step": 960000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19209 |
}
|
| 19210 |
],
|
| 19211 |
"max_steps": 1000000,
|
| 19212 |
"num_train_epochs": 12,
|
| 19213 |
-
"total_flos": 6.
|
| 19214 |
"trial_name": null,
|
| 19215 |
"trial_params": null
|
| 19216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.461148968916945,
|
| 5 |
+
"global_step": 970000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 19206 |
"eval_samples_per_second": 837.549,
|
| 19207 |
"eval_steps_per_second": 13.127,
|
| 19208 |
"step": 960000
|
| 19209 |
+
},
|
| 19210 |
+
{
|
| 19211 |
+
"epoch": 10.36,
|
| 19212 |
+
"learning_rate": 1.0596344426086501e-05,
|
| 19213 |
+
"loss": 0.1798,
|
| 19214 |
+
"step": 960500
|
| 19215 |
+
},
|
| 19216 |
+
{
|
| 19217 |
+
"epoch": 10.36,
|
| 19218 |
+
"learning_rate": 1.0581363452005424e-05,
|
| 19219 |
+
"loss": 0.1805,
|
| 19220 |
+
"step": 961000
|
| 19221 |
+
},
|
| 19222 |
+
{
|
| 19223 |
+
"epoch": 10.36,
|
| 19224 |
+
"eval_loss": 0.17073865234851837,
|
| 19225 |
+
"eval_runtime": 2.5938,
|
| 19226 |
+
"eval_samples_per_second": 885.585,
|
| 19227 |
+
"eval_steps_per_second": 13.879,
|
| 19228 |
+
"step": 961000
|
| 19229 |
+
},
|
| 19230 |
+
{
|
| 19231 |
+
"epoch": 10.37,
|
| 19232 |
+
"learning_rate": 1.0566572265825932e-05,
|
| 19233 |
+
"loss": 0.18,
|
| 19234 |
+
"step": 961500
|
| 19235 |
+
},
|
| 19236 |
+
{
|
| 19237 |
+
"epoch": 10.37,
|
| 19238 |
+
"learning_rate": 1.0551970907986557e-05,
|
| 19239 |
+
"loss": 0.1801,
|
| 19240 |
+
"step": 962000
|
| 19241 |
+
},
|
| 19242 |
+
{
|
| 19243 |
+
"epoch": 10.37,
|
| 19244 |
+
"eval_loss": 0.17134888470172882,
|
| 19245 |
+
"eval_runtime": 2.5813,
|
| 19246 |
+
"eval_samples_per_second": 889.852,
|
| 19247 |
+
"eval_steps_per_second": 13.946,
|
| 19248 |
+
"step": 962000
|
| 19249 |
+
},
|
| 19250 |
+
{
|
| 19251 |
+
"epoch": 10.38,
|
| 19252 |
+
"learning_rate": 1.0537559418406849e-05,
|
| 19253 |
+
"loss": 0.18,
|
| 19254 |
+
"step": 962500
|
| 19255 |
+
},
|
| 19256 |
+
{
|
| 19257 |
+
"epoch": 10.38,
|
| 19258 |
+
"learning_rate": 1.0523337836487271e-05,
|
| 19259 |
+
"loss": 0.1799,
|
| 19260 |
+
"step": 963000
|
| 19261 |
+
},
|
| 19262 |
+
{
|
| 19263 |
+
"epoch": 10.38,
|
| 19264 |
+
"eval_loss": 0.17050015926361084,
|
| 19265 |
+
"eval_runtime": 2.6391,
|
| 19266 |
+
"eval_samples_per_second": 870.378,
|
| 19267 |
+
"eval_steps_per_second": 13.641,
|
| 19268 |
+
"step": 963000
|
| 19269 |
+
},
|
| 19270 |
+
{
|
| 19271 |
+
"epoch": 10.39,
|
| 19272 |
+
"learning_rate": 1.0509306201109092e-05,
|
| 19273 |
+
"loss": 0.1801,
|
| 19274 |
+
"step": 963500
|
| 19275 |
+
},
|
| 19276 |
+
{
|
| 19277 |
+
"epoch": 10.39,
|
| 19278 |
+
"learning_rate": 1.0495464550634267e-05,
|
| 19279 |
+
"loss": 0.18,
|
| 19280 |
+
"step": 964000
|
| 19281 |
+
},
|
| 19282 |
+
{
|
| 19283 |
+
"epoch": 10.39,
|
| 19284 |
+
"eval_loss": 0.17047521471977234,
|
| 19285 |
+
"eval_runtime": 2.6548,
|
| 19286 |
+
"eval_samples_per_second": 865.215,
|
| 19287 |
+
"eval_steps_per_second": 13.56,
|
| 19288 |
+
"step": 964000
|
| 19289 |
+
},
|
| 19290 |
+
{
|
| 19291 |
+
"epoch": 10.4,
|
| 19292 |
+
"learning_rate": 1.0481812922905339e-05,
|
| 19293 |
+
"loss": 0.1805,
|
| 19294 |
+
"step": 964500
|
| 19295 |
+
},
|
| 19296 |
+
{
|
| 19297 |
+
"epoch": 10.41,
|
| 19298 |
+
"learning_rate": 1.046835135524533e-05,
|
| 19299 |
+
"loss": 0.1798,
|
| 19300 |
+
"step": 965000
|
| 19301 |
+
},
|
| 19302 |
+
{
|
| 19303 |
+
"epoch": 10.41,
|
| 19304 |
+
"eval_loss": 0.17172271013259888,
|
| 19305 |
+
"eval_runtime": 2.5812,
|
| 19306 |
+
"eval_samples_per_second": 889.895,
|
| 19307 |
+
"eval_steps_per_second": 13.947,
|
| 19308 |
+
"step": 965000
|
| 19309 |
+
},
|
| 19310 |
+
{
|
| 19311 |
+
"epoch": 10.41,
|
| 19312 |
+
"learning_rate": 1.0455079884457653e-05,
|
| 19313 |
+
"loss": 0.1801,
|
| 19314 |
+
"step": 965500
|
| 19315 |
+
},
|
| 19316 |
+
{
|
| 19317 |
+
"epoch": 10.42,
|
| 19318 |
+
"learning_rate": 1.044199854682601e-05,
|
| 19319 |
+
"loss": 0.1797,
|
| 19320 |
+
"step": 966000
|
| 19321 |
+
},
|
| 19322 |
+
{
|
| 19323 |
+
"epoch": 10.42,
|
| 19324 |
+
"eval_loss": 0.16956347227096558,
|
| 19325 |
+
"eval_runtime": 2.6699,
|
| 19326 |
+
"eval_samples_per_second": 860.34,
|
| 19327 |
+
"eval_steps_per_second": 13.484,
|
| 19328 |
+
"step": 966000
|
| 19329 |
+
},
|
| 19330 |
+
{
|
| 19331 |
+
"epoch": 10.42,
|
| 19332 |
+
"learning_rate": 1.0429107378114277e-05,
|
| 19333 |
+
"loss": 0.1802,
|
| 19334 |
+
"step": 966500
|
| 19335 |
+
},
|
| 19336 |
+
{
|
| 19337 |
+
"epoch": 10.43,
|
| 19338 |
+
"learning_rate": 1.0416406413566414e-05,
|
| 19339 |
+
"loss": 0.1802,
|
| 19340 |
+
"step": 967000
|
| 19341 |
+
},
|
| 19342 |
+
{
|
| 19343 |
+
"epoch": 10.43,
|
| 19344 |
+
"eval_loss": 0.17151953279972076,
|
| 19345 |
+
"eval_runtime": 2.5629,
|
| 19346 |
+
"eval_samples_per_second": 896.252,
|
| 19347 |
+
"eval_steps_per_second": 14.047,
|
| 19348 |
+
"step": 967000
|
| 19349 |
+
},
|
| 19350 |
+
{
|
| 19351 |
+
"epoch": 10.43,
|
| 19352 |
+
"learning_rate": 1.0403895687906366e-05,
|
| 19353 |
+
"loss": 0.1803,
|
| 19354 |
+
"step": 967500
|
| 19355 |
+
},
|
| 19356 |
+
{
|
| 19357 |
+
"epoch": 10.44,
|
| 19358 |
+
"learning_rate": 1.0391575235337991e-05,
|
| 19359 |
+
"loss": 0.1798,
|
| 19360 |
+
"step": 968000
|
| 19361 |
+
},
|
| 19362 |
+
{
|
| 19363 |
+
"epoch": 10.44,
|
| 19364 |
+
"eval_loss": 0.17273712158203125,
|
| 19365 |
+
"eval_runtime": 2.6967,
|
| 19366 |
+
"eval_samples_per_second": 851.792,
|
| 19367 |
+
"eval_steps_per_second": 13.35,
|
| 19368 |
+
"step": 968000
|
| 19369 |
+
},
|
| 19370 |
+
{
|
| 19371 |
+
"epoch": 10.44,
|
| 19372 |
+
"learning_rate": 1.0379445089544929e-05,
|
| 19373 |
+
"loss": 0.1799,
|
| 19374 |
+
"step": 968500
|
| 19375 |
+
},
|
| 19376 |
+
{
|
| 19377 |
+
"epoch": 10.45,
|
| 19378 |
+
"learning_rate": 1.0367505283690547e-05,
|
| 19379 |
+
"loss": 0.1797,
|
| 19380 |
+
"step": 969000
|
| 19381 |
+
},
|
| 19382 |
+
{
|
| 19383 |
+
"epoch": 10.45,
|
| 19384 |
+
"eval_loss": 0.17085492610931396,
|
| 19385 |
+
"eval_runtime": 2.6519,
|
| 19386 |
+
"eval_samples_per_second": 866.18,
|
| 19387 |
+
"eval_steps_per_second": 13.575,
|
| 19388 |
+
"step": 969000
|
| 19389 |
+
},
|
| 19390 |
+
{
|
| 19391 |
+
"epoch": 10.46,
|
| 19392 |
+
"learning_rate": 1.0355755850417803e-05,
|
| 19393 |
+
"loss": 0.1797,
|
| 19394 |
+
"step": 969500
|
| 19395 |
+
},
|
| 19396 |
+
{
|
| 19397 |
+
"epoch": 10.46,
|
| 19398 |
+
"learning_rate": 1.0344196821849202e-05,
|
| 19399 |
+
"loss": 0.1799,
|
| 19400 |
+
"step": 970000
|
| 19401 |
+
},
|
| 19402 |
+
{
|
| 19403 |
+
"epoch": 10.46,
|
| 19404 |
+
"eval_loss": 0.1711302548646927,
|
| 19405 |
+
"eval_runtime": 2.5979,
|
| 19406 |
+
"eval_samples_per_second": 884.178,
|
| 19407 |
+
"eval_steps_per_second": 13.857,
|
| 19408 |
+
"step": 970000
|
| 19409 |
}
|
| 19410 |
],
|
| 19411 |
"max_steps": 1000000,
|
| 19412 |
"num_train_epochs": 12,
|
| 19413 |
+
"total_flos": 6.799670555468517e+22,
|
| 19414 |
"trial_name": null,
|
| 19415 |
"trial_params": null
|
| 19416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebc5b0eb8c3bb3dd69db934b29e5beccddfae1a796cf948830a8b05766d44497
|
| 3 |
size 449471589
|