Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step4100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4100/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b23217742b312e0ba6a642fbced78169e97e5bd94aa8ec9429ceefc05f1a76b
|
| 3 |
size 1037269336
|
last-checkpoint/global_step4100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1dea63ea269d8a8db77c339ab50f09e474eadb1a6659d06ab7df2dbde5aac2c
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step4100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea94dca34e505bbaa93fb7fe6a1bcdcc87d57c1eb86c42d96a787243b93d70bb
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a02d8491507c62bd6bf4534b2406d66207877a390403265562036f29f45b719
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:791541856934b608332f89404414aa6812d82a818052289589ae21c1b1b0ec9f
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4100/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2cbbf173e016d84a36966c4ae3c102fc4756b8d9faaa3ddf6008f53e9b95ee1
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step4100
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bbc3d1660e77580d684add9546e5fe8bd6fc84071100e9a520c41d938330a79
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00d7703de6e560ffbbf010cbfffa20522d7da9ff9f4719e1064c19461079ea48
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:204484b4dccae23b095603bfb2d8fc482440509c028607bd9556adf092617aac
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0fceb540dfe7b45fc0da1b3cdddd6d3f71c61414fae78500c040a17afd7ae2e
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59fba9955671eaa664ef7e8ac2aec090cfd8274510ae38341a2658c4438b5bf0
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6248,6 +6248,162 @@
|
|
| 6248 |
"eval_samples_per_second": 172.663,
|
| 6249 |
"eval_steps_per_second": 10.827,
|
| 6250 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6251 |
}
|
| 6252 |
],
|
| 6253 |
"logging_steps": 5,
|
|
@@ -6276,7 +6432,7 @@
|
|
| 6276 |
"attributes": {}
|
| 6277 |
}
|
| 6278 |
},
|
| 6279 |
-
"total_flos": 1.
|
| 6280 |
"train_batch_size": 4,
|
| 6281 |
"trial_name": null,
|
| 6282 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.923519253730774,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.5960168629161215,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 4100,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6248 |
"eval_samples_per_second": 172.663,
|
| 6249 |
"eval_steps_per_second": 10.827,
|
| 6250 |
"step": 4000
|
| 6251 |
+
},
|
| 6252 |
+
{
|
| 6253 |
+
"epoch": 0.5822067160924553,
|
| 6254 |
+
"grad_norm": 2.408169984817505,
|
| 6255 |
+
"learning_rate": 6.642383155912741e-05,
|
| 6256 |
+
"loss": 2.133,
|
| 6257 |
+
"step": 4005
|
| 6258 |
+
},
|
| 6259 |
+
{
|
| 6260 |
+
"epoch": 0.5829335659252798,
|
| 6261 |
+
"grad_norm": 2.9105172157287598,
|
| 6262 |
+
"learning_rate": 6.634866071919054e-05,
|
| 6263 |
+
"loss": 2.124,
|
| 6264 |
+
"step": 4010
|
| 6265 |
+
},
|
| 6266 |
+
{
|
| 6267 |
+
"epoch": 0.5836604157581043,
|
| 6268 |
+
"grad_norm": 2.630783796310425,
|
| 6269 |
+
"learning_rate": 6.627344896916006e-05,
|
| 6270 |
+
"loss": 2.1179,
|
| 6271 |
+
"step": 4015
|
| 6272 |
+
},
|
| 6273 |
+
{
|
| 6274 |
+
"epoch": 0.5843872655909289,
|
| 6275 |
+
"grad_norm": 2.399688482284546,
|
| 6276 |
+
"learning_rate": 6.619819649838151e-05,
|
| 6277 |
+
"loss": 2.1174,
|
| 6278 |
+
"step": 4020
|
| 6279 |
+
},
|
| 6280 |
+
{
|
| 6281 |
+
"epoch": 0.5851141154237535,
|
| 6282 |
+
"grad_norm": 2.5117554664611816,
|
| 6283 |
+
"learning_rate": 6.612290349630285e-05,
|
| 6284 |
+
"loss": 2.0063,
|
| 6285 |
+
"step": 4025
|
| 6286 |
+
},
|
| 6287 |
+
{
|
| 6288 |
+
"epoch": 0.585840965256578,
|
| 6289 |
+
"grad_norm": 2.6324381828308105,
|
| 6290 |
+
"learning_rate": 6.604757015247416e-05,
|
| 6291 |
+
"loss": 2.057,
|
| 6292 |
+
"step": 4030
|
| 6293 |
+
},
|
| 6294 |
+
{
|
| 6295 |
+
"epoch": 0.5865678150894025,
|
| 6296 |
+
"grad_norm": 2.442852258682251,
|
| 6297 |
+
"learning_rate": 6.597219665654702e-05,
|
| 6298 |
+
"loss": 1.933,
|
| 6299 |
+
"step": 4035
|
| 6300 |
+
},
|
| 6301 |
+
{
|
| 6302 |
+
"epoch": 0.587294664922227,
|
| 6303 |
+
"grad_norm": 2.4938302040100098,
|
| 6304 |
+
"learning_rate": 6.589678319827412e-05,
|
| 6305 |
+
"loss": 2.2347,
|
| 6306 |
+
"step": 4040
|
| 6307 |
+
},
|
| 6308 |
+
{
|
| 6309 |
+
"epoch": 0.5880215147550516,
|
| 6310 |
+
"grad_norm": 2.2091469764709473,
|
| 6311 |
+
"learning_rate": 6.582132996750874e-05,
|
| 6312 |
+
"loss": 2.0614,
|
| 6313 |
+
"step": 4045
|
| 6314 |
+
},
|
| 6315 |
+
{
|
| 6316 |
+
"epoch": 0.5887483645878762,
|
| 6317 |
+
"grad_norm": 2.2665116786956787,
|
| 6318 |
+
"learning_rate": 6.574583715420433e-05,
|
| 6319 |
+
"loss": 2.085,
|
| 6320 |
+
"step": 4050
|
| 6321 |
+
},
|
| 6322 |
+
{
|
| 6323 |
+
"epoch": 0.5887483645878762,
|
| 6324 |
+
"eval_loss": 1.9283087253570557,
|
| 6325 |
+
"eval_runtime": 21.1511,
|
| 6326 |
+
"eval_samples_per_second": 156.068,
|
| 6327 |
+
"eval_steps_per_second": 9.787,
|
| 6328 |
+
"step": 4050
|
| 6329 |
+
},
|
| 6330 |
+
{
|
| 6331 |
+
"epoch": 0.5894752144207007,
|
| 6332 |
+
"grad_norm": 2.5516645908355713,
|
| 6333 |
+
"learning_rate": 6.567030494841393e-05,
|
| 6334 |
+
"loss": 2.1021,
|
| 6335 |
+
"step": 4055
|
| 6336 |
+
},
|
| 6337 |
+
{
|
| 6338 |
+
"epoch": 0.5902020642535252,
|
| 6339 |
+
"grad_norm": 2.4371495246887207,
|
| 6340 |
+
"learning_rate": 6.559473354028979e-05,
|
| 6341 |
+
"loss": 2.0655,
|
| 6342 |
+
"step": 4060
|
| 6343 |
+
},
|
| 6344 |
+
{
|
| 6345 |
+
"epoch": 0.5909289140863497,
|
| 6346 |
+
"grad_norm": 2.0865836143493652,
|
| 6347 |
+
"learning_rate": 6.551912312008285e-05,
|
| 6348 |
+
"loss": 2.1788,
|
| 6349 |
+
"step": 4065
|
| 6350 |
+
},
|
| 6351 |
+
{
|
| 6352 |
+
"epoch": 0.5916557639191743,
|
| 6353 |
+
"grad_norm": 2.408687114715576,
|
| 6354 |
+
"learning_rate": 6.544347387814224e-05,
|
| 6355 |
+
"loss": 2.1187,
|
| 6356 |
+
"step": 4070
|
| 6357 |
+
},
|
| 6358 |
+
{
|
| 6359 |
+
"epoch": 0.5923826137519989,
|
| 6360 |
+
"grad_norm": 2.4930145740509033,
|
| 6361 |
+
"learning_rate": 6.536778600491481e-05,
|
| 6362 |
+
"loss": 2.2741,
|
| 6363 |
+
"step": 4075
|
| 6364 |
+
},
|
| 6365 |
+
{
|
| 6366 |
+
"epoch": 0.5931094635848234,
|
| 6367 |
+
"grad_norm": 2.3992059230804443,
|
| 6368 |
+
"learning_rate": 6.529205969094474e-05,
|
| 6369 |
+
"loss": 1.9715,
|
| 6370 |
+
"step": 4080
|
| 6371 |
+
},
|
| 6372 |
+
{
|
| 6373 |
+
"epoch": 0.5938363134176479,
|
| 6374 |
+
"grad_norm": 2.214466094970703,
|
| 6375 |
+
"learning_rate": 6.521629512687291e-05,
|
| 6376 |
+
"loss": 2.1169,
|
| 6377 |
+
"step": 4085
|
| 6378 |
+
},
|
| 6379 |
+
{
|
| 6380 |
+
"epoch": 0.5945631632504724,
|
| 6381 |
+
"grad_norm": 2.3627679347991943,
|
| 6382 |
+
"learning_rate": 6.514049250343653e-05,
|
| 6383 |
+
"loss": 1.9602,
|
| 6384 |
+
"step": 4090
|
| 6385 |
+
},
|
| 6386 |
+
{
|
| 6387 |
+
"epoch": 0.595290013083297,
|
| 6388 |
+
"grad_norm": 2.594008684158325,
|
| 6389 |
+
"learning_rate": 6.506465201146858e-05,
|
| 6390 |
+
"loss": 2.1459,
|
| 6391 |
+
"step": 4095
|
| 6392 |
+
},
|
| 6393 |
+
{
|
| 6394 |
+
"epoch": 0.5960168629161215,
|
| 6395 |
+
"grad_norm": 1.9788795709609985,
|
| 6396 |
+
"learning_rate": 6.498877384189746e-05,
|
| 6397 |
+
"loss": 1.898,
|
| 6398 |
+
"step": 4100
|
| 6399 |
+
},
|
| 6400 |
+
{
|
| 6401 |
+
"epoch": 0.5960168629161215,
|
| 6402 |
+
"eval_loss": 1.923519253730774,
|
| 6403 |
+
"eval_runtime": 18.9492,
|
| 6404 |
+
"eval_samples_per_second": 174.203,
|
| 6405 |
+
"eval_steps_per_second": 10.924,
|
| 6406 |
+
"step": 4100
|
| 6407 |
}
|
| 6408 |
],
|
| 6409 |
"logging_steps": 5,
|
|
|
|
| 6432 |
"attributes": {}
|
| 6433 |
}
|
| 6434 |
},
|
| 6435 |
+
"total_flos": 1.06971867773508e+18,
|
| 6436 |
"train_batch_size": 4,
|
| 6437 |
"trial_name": null,
|
| 6438 |
"trial_params": null
|