Upload folder using huggingface_hub
Browse files- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +263 -3
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -29,12 +29,12 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
|
|
|
|
|
|
|
|
|
| 32 |
"up_proj",
|
| 33 |
"k_proj",
|
| 34 |
-
"down_proj",
|
| 35 |
-
"gate_proj",
|
| 36 |
"o_proj",
|
| 37 |
-
"v_proj",
|
| 38 |
"q_proj"
|
| 39 |
],
|
| 40 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"v_proj",
|
| 33 |
+
"gate_proj",
|
| 34 |
+
"down_proj",
|
| 35 |
"up_proj",
|
| 36 |
"k_proj",
|
|
|
|
|
|
|
| 37 |
"o_proj",
|
|
|
|
| 38 |
"q_proj"
|
| 39 |
],
|
| 40 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 262406656
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b02836ae885c9a379cab7d418669e282e39824c3e8442660ff95b5af732c1d3
|
| 3 |
size 262406656
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 122872331
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a10cc964c60bfde918121f701449487ada51f8d814bd8c940c07460e711c0350
|
| 3 |
size 122872331
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b86a856532e9dbe010592bfb956348463afea72b9792aedba460c25c8145b5ae
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14a4abf5cec20ca257e82b8cc70b248ece9e4b9d912fb1d104f5f1a77d9ff1ea
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -37318,11 +37318,271 @@
|
|
| 37318 |
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37319 |
"rewards/quality_reward_func/std": 0.0,
|
| 37320 |
"step": 14350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37321 |
}
|
| 37322 |
],
|
| 37323 |
"logging_steps": 10,
|
| 37324 |
"max_steps": 14544,
|
| 37325 |
-
"num_input_tokens_seen":
|
| 37326 |
"num_train_epochs": 1,
|
| 37327 |
"save_steps": 50,
|
| 37328 |
"stateful_callbacks": {
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.9935368536853685,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14450,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 37318 |
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37319 |
"rewards/quality_reward_func/std": 0.0,
|
| 37320 |
"step": 14350
|
| 37321 |
+
},
|
| 37322 |
+
{
|
| 37323 |
+
"completion_length": 18.3,
|
| 37324 |
+
"completions/clipped_ratio": 0.0,
|
| 37325 |
+
"completions/max_length": 18.3,
|
| 37326 |
+
"completions/max_terminated_length": 18.3,
|
| 37327 |
+
"completions/mean_length": 16.3,
|
| 37328 |
+
"completions/mean_terminated_length": 16.3,
|
| 37329 |
+
"completions/min_length": 15.1,
|
| 37330 |
+
"completions/min_terminated_length": 15.1,
|
| 37331 |
+
"epoch": 0.9873487348734874,
|
| 37332 |
+
"frac_reward_zero_std": 1.0,
|
| 37333 |
+
"grad_norm": 0.0,
|
| 37334 |
+
"kl": 1.2089169837534428,
|
| 37335 |
+
"learning_rate": 2.464160105727642e-09,
|
| 37336 |
+
"loss": 0.0,
|
| 37337 |
+
"num_tokens": 20642162.0,
|
| 37338 |
+
"reward": 4.099999904632568,
|
| 37339 |
+
"reward_std": 0.0,
|
| 37340 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37341 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37342 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37343 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37344 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37345 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37346 |
+
"step": 14360
|
| 37347 |
+
},
|
| 37348 |
+
{
|
| 37349 |
+
"completion_length": 19.2,
|
| 37350 |
+
"completions/clipped_ratio": 0.0,
|
| 37351 |
+
"completions/max_length": 19.2,
|
| 37352 |
+
"completions/max_terminated_length": 19.2,
|
| 37353 |
+
"completions/mean_length": 17.275,
|
| 37354 |
+
"completions/mean_terminated_length": 17.275,
|
| 37355 |
+
"completions/min_length": 16.2,
|
| 37356 |
+
"completions/min_terminated_length": 16.2,
|
| 37357 |
+
"epoch": 0.988036303630363,
|
| 37358 |
+
"frac_reward_zero_std": 1.0,
|
| 37359 |
+
"grad_norm": 0.0,
|
| 37360 |
+
"kl": 1.3353900104761123,
|
| 37361 |
+
"learning_rate": 2.2050024110001345e-09,
|
| 37362 |
+
"loss": 0.0,
|
| 37363 |
+
"num_tokens": 20656053.0,
|
| 37364 |
+
"reward": 4.099999904632568,
|
| 37365 |
+
"reward_std": 0.0,
|
| 37366 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37367 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37368 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37369 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37370 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37371 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37372 |
+
"step": 14370
|
| 37373 |
+
},
|
| 37374 |
+
{
|
| 37375 |
+
"completion_length": 17.4,
|
| 37376 |
+
"completions/clipped_ratio": 0.0,
|
| 37377 |
+
"completions/max_length": 17.4,
|
| 37378 |
+
"completions/max_terminated_length": 17.4,
|
| 37379 |
+
"completions/mean_length": 16.175,
|
| 37380 |
+
"completions/mean_terminated_length": 16.175,
|
| 37381 |
+
"completions/min_length": 15.6,
|
| 37382 |
+
"completions/min_terminated_length": 15.6,
|
| 37383 |
+
"epoch": 0.9887238723872387,
|
| 37384 |
+
"frac_reward_zero_std": 1.0,
|
| 37385 |
+
"grad_norm": 0.0,
|
| 37386 |
+
"kl": 1.1925065904855727,
|
| 37387 |
+
"learning_rate": 1.9602341397173542e-09,
|
| 37388 |
+
"loss": 0.0,
|
| 37389 |
+
"num_tokens": 20669724.0,
|
| 37390 |
+
"reward": 4.099999904632568,
|
| 37391 |
+
"reward_std": 0.0,
|
| 37392 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37393 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37394 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37395 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37396 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37397 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37398 |
+
"step": 14380
|
| 37399 |
+
},
|
| 37400 |
+
{
|
| 37401 |
+
"completion_length": 16.5,
|
| 37402 |
+
"completions/clipped_ratio": 0.0,
|
| 37403 |
+
"completions/max_length": 16.5,
|
| 37404 |
+
"completions/max_terminated_length": 16.5,
|
| 37405 |
+
"completions/mean_length": 15.575,
|
| 37406 |
+
"completions/mean_terminated_length": 15.575,
|
| 37407 |
+
"completions/min_length": 14.6,
|
| 37408 |
+
"completions/min_terminated_length": 14.6,
|
| 37409 |
+
"epoch": 0.9894114411441144,
|
| 37410 |
+
"frac_reward_zero_std": 1.0,
|
| 37411 |
+
"grad_norm": 0.0,
|
| 37412 |
+
"kl": 1.3791985176503658,
|
| 37413 |
+
"learning_rate": 1.7298567019527479e-09,
|
| 37414 |
+
"loss": 0.0,
|
| 37415 |
+
"num_tokens": 20687811.0,
|
| 37416 |
+
"reward": 4.099999904632568,
|
| 37417 |
+
"reward_std": 0.0,
|
| 37418 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37419 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37420 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37421 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37422 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37423 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37424 |
+
"step": 14390
|
| 37425 |
+
},
|
| 37426 |
+
{
|
| 37427 |
+
"completion_length": 17.3,
|
| 37428 |
+
"completions/clipped_ratio": 0.0,
|
| 37429 |
+
"completions/max_length": 17.3,
|
| 37430 |
+
"completions/max_terminated_length": 17.3,
|
| 37431 |
+
"completions/mean_length": 15.5,
|
| 37432 |
+
"completions/mean_terminated_length": 15.5,
|
| 37433 |
+
"completions/min_length": 14.4,
|
| 37434 |
+
"completions/min_terminated_length": 14.4,
|
| 37435 |
+
"epoch": 0.9900990099009901,
|
| 37436 |
+
"frac_reward_zero_std": 1.0,
|
| 37437 |
+
"grad_norm": 0.0,
|
| 37438 |
+
"kl": 1.4439594164490699,
|
| 37439 |
+
"learning_rate": 1.513871424876079e-09,
|
| 37440 |
+
"loss": 0.0,
|
| 37441 |
+
"num_tokens": 20703267.0,
|
| 37442 |
+
"reward": 4.099999904632568,
|
| 37443 |
+
"reward_std": 0.0,
|
| 37444 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37445 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37446 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37447 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37448 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37449 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37450 |
+
"step": 14400
|
| 37451 |
+
},
|
| 37452 |
+
{
|
| 37453 |
+
"completion_length": 20.4,
|
| 37454 |
+
"completions/clipped_ratio": 0.0,
|
| 37455 |
+
"completions/max_length": 20.4,
|
| 37456 |
+
"completions/max_terminated_length": 20.4,
|
| 37457 |
+
"completions/mean_length": 18.975,
|
| 37458 |
+
"completions/mean_terminated_length": 18.975,
|
| 37459 |
+
"completions/min_length": 17.6,
|
| 37460 |
+
"completions/min_terminated_length": 17.6,
|
| 37461 |
+
"epoch": 0.9907865786578658,
|
| 37462 |
+
"frac_reward_zero_std": 1.0,
|
| 37463 |
+
"grad_norm": 0.0,
|
| 37464 |
+
"kl": 1.0520384900271893,
|
| 37465 |
+
"learning_rate": 1.3122795527467647e-09,
|
| 37466 |
+
"loss": 0.0,
|
| 37467 |
+
"num_tokens": 20715926.0,
|
| 37468 |
+
"reward": 4.099999904632568,
|
| 37469 |
+
"reward_std": 0.0,
|
| 37470 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37471 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37472 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37473 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37474 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37475 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37476 |
+
"step": 14410
|
| 37477 |
+
},
|
| 37478 |
+
{
|
| 37479 |
+
"completion_length": 16.3,
|
| 37480 |
+
"completions/clipped_ratio": 0.0,
|
| 37481 |
+
"completions/max_length": 16.3,
|
| 37482 |
+
"completions/max_terminated_length": 16.3,
|
| 37483 |
+
"completions/mean_length": 15.575,
|
| 37484 |
+
"completions/mean_terminated_length": 15.575,
|
| 37485 |
+
"completions/min_length": 14.8,
|
| 37486 |
+
"completions/min_terminated_length": 14.8,
|
| 37487 |
+
"epoch": 0.9914741474147415,
|
| 37488 |
+
"frac_reward_zero_std": 1.0,
|
| 37489 |
+
"grad_norm": 0.0,
|
| 37490 |
+
"kl": 1.1841505281394347,
|
| 37491 |
+
"learning_rate": 1.125082246904996e-09,
|
| 37492 |
+
"loss": 0.0,
|
| 37493 |
+
"num_tokens": 20729649.0,
|
| 37494 |
+
"reward": 4.099999904632568,
|
| 37495 |
+
"reward_std": 0.0,
|
| 37496 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37497 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37498 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37499 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37500 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37501 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37502 |
+
"step": 14420
|
| 37503 |
+
},
|
| 37504 |
+
{
|
| 37505 |
+
"completion_length": 18.1,
|
| 37506 |
+
"completions/clipped_ratio": 0.0,
|
| 37507 |
+
"completions/max_length": 18.1,
|
| 37508 |
+
"completions/max_terminated_length": 18.1,
|
| 37509 |
+
"completions/mean_length": 17.3,
|
| 37510 |
+
"completions/mean_terminated_length": 17.3,
|
| 37511 |
+
"completions/min_length": 16.4,
|
| 37512 |
+
"completions/min_terminated_length": 16.4,
|
| 37513 |
+
"epoch": 0.9921617161716172,
|
| 37514 |
+
"frac_reward_zero_std": 1.0,
|
| 37515 |
+
"grad_norm": 0.0,
|
| 37516 |
+
"kl": 1.377218122780323,
|
| 37517 |
+
"learning_rate": 9.52280585766463e-10,
|
| 37518 |
+
"loss": 0.0,
|
| 37519 |
+
"num_tokens": 20747045.0,
|
| 37520 |
+
"reward": 4.099999904632568,
|
| 37521 |
+
"reward_std": 0.0,
|
| 37522 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37523 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37524 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37525 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37526 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37527 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37528 |
+
"step": 14430
|
| 37529 |
+
},
|
| 37530 |
+
{
|
| 37531 |
+
"completion_length": 16.3,
|
| 37532 |
+
"completions/clipped_ratio": 0.0,
|
| 37533 |
+
"completions/max_length": 16.3,
|
| 37534 |
+
"completions/max_terminated_length": 16.3,
|
| 37535 |
+
"completions/mean_length": 15.275,
|
| 37536 |
+
"completions/mean_terminated_length": 15.275,
|
| 37537 |
+
"completions/min_length": 14.4,
|
| 37538 |
+
"completions/min_terminated_length": 14.4,
|
| 37539 |
+
"epoch": 0.9928492849284929,
|
| 37540 |
+
"frac_reward_zero_std": 1.0,
|
| 37541 |
+
"grad_norm": 0.0,
|
| 37542 |
+
"kl": 1.112162598967552,
|
| 37543 |
+
"learning_rate": 7.938755648156938e-10,
|
| 37544 |
+
"loss": 0.0,
|
| 37545 |
+
"num_tokens": 20758908.0,
|
| 37546 |
+
"reward": 4.099999904632568,
|
| 37547 |
+
"reward_std": 0.0,
|
| 37548 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37549 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37550 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37551 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37552 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37553 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37554 |
+
"step": 14440
|
| 37555 |
+
},
|
| 37556 |
+
{
|
| 37557 |
+
"completion_length": 19.2,
|
| 37558 |
+
"completions/clipped_ratio": 0.0,
|
| 37559 |
+
"completions/max_length": 19.2,
|
| 37560 |
+
"completions/max_terminated_length": 19.2,
|
| 37561 |
+
"completions/mean_length": 16.7,
|
| 37562 |
+
"completions/mean_terminated_length": 16.7,
|
| 37563 |
+
"completions/min_length": 14.4,
|
| 37564 |
+
"completions/min_terminated_length": 14.4,
|
| 37565 |
+
"epoch": 0.9935368536853685,
|
| 37566 |
+
"frac_reward_zero_std": 1.0,
|
| 37567 |
+
"grad_norm": 0.0,
|
| 37568 |
+
"kl": 0.9231208987534046,
|
| 37569 |
+
"learning_rate": 6.498680966007809e-10,
|
| 37570 |
+
"loss": 0.0,
|
| 37571 |
+
"num_tokens": 20774668.0,
|
| 37572 |
+
"reward": 4.099999904632568,
|
| 37573 |
+
"reward_std": 0.0,
|
| 37574 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 37575 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 37576 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 37577 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 37578 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 37579 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 37580 |
+
"step": 14450
|
| 37581 |
}
|
| 37582 |
],
|
| 37583 |
"logging_steps": 10,
|
| 37584 |
"max_steps": 14544,
|
| 37585 |
+
"num_input_tokens_seen": 20774668,
|
| 37586 |
"num_train_epochs": 1,
|
| 37587 |
"save_steps": 50,
|
| 37588 |
"stateful_callbacks": {
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7057
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88b07759f83baf72651a5497140bc0ce24b06811a14c3e8fba0534eb4ac791ea
|
| 3 |
size 7057
|