Training in progress, step 1750, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64cb29f82be51963b1c87a3edf410599592d499ccb583a2211299eabdac88a5e
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba89ced11fa4b1eb1cfa02eb7f638c31d45ea49afc6a96d68cbdaf46dd9663d2
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11f42b997535391b4778f6818a4f5d5cc944d2434ea0edc70a8def90290e971a
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f817dba2e28a76d635b3c9e18d88eacd11804518703ee89b156fb1819d644283
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2071,6 +2071,42 @@
|
|
| 2071 |
"reward_std": 0.26179009675979614,
|
| 2072 |
"rewards/custom_reward_simplified_v7_dblog": 0.725,
|
| 2073 |
"step": 1720
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2074 |
}
|
| 2075 |
],
|
| 2076 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.013937448730099314,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1750,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2071 |
"reward_std": 0.26179009675979614,
|
| 2072 |
"rewards/custom_reward_simplified_v7_dblog": 0.725,
|
| 2073 |
"step": 1720
|
| 2074 |
+
},
|
| 2075 |
+
{
|
| 2076 |
+
"completion_length": 633.31875,
|
| 2077 |
+
"epoch": 0.013778163601755322,
|
| 2078 |
+
"grad_norm": 0.22654354572296143,
|
| 2079 |
+
"kl": 0.013244283269159496,
|
| 2080 |
+
"learning_rate": 1.0959947330412681e-06,
|
| 2081 |
+
"loss": 0.0005,
|
| 2082 |
+
"reward": 0.921875,
|
| 2083 |
+
"reward_std": 0.2066536843776703,
|
| 2084 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.921875,
|
| 2085 |
+
"step": 1730
|
| 2086 |
+
},
|
| 2087 |
+
{
|
| 2088 |
+
"completion_length": 615.29375,
|
| 2089 |
+
"epoch": 0.013857806165927319,
|
| 2090 |
+
"grad_norm": 0.22673261165618896,
|
| 2091 |
+
"kl": 0.014753601653501392,
|
| 2092 |
+
"learning_rate": 1.0660589091223854e-06,
|
| 2093 |
+
"loss": 0.0006,
|
| 2094 |
+
"reward": 0.815625,
|
| 2095 |
+
"reward_std": 0.30853241235017775,
|
| 2096 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.815625,
|
| 2097 |
+
"step": 1740
|
| 2098 |
+
},
|
| 2099 |
+
{
|
| 2100 |
+
"completion_length": 630.3625,
|
| 2101 |
+
"epoch": 0.013937448730099314,
|
| 2102 |
+
"grad_norm": 0.012196751311421394,
|
| 2103 |
+
"kl": 0.01440229129511863,
|
| 2104 |
+
"learning_rate": 1.0364264155751489e-06,
|
| 2105 |
+
"loss": 0.0006,
|
| 2106 |
+
"reward": 0.915625,
|
| 2107 |
+
"reward_std": 0.23927971720695496,
|
| 2108 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.915625,
|
| 2109 |
+
"step": 1750
|
| 2110 |
}
|
| 2111 |
],
|
| 2112 |
"logging_steps": 10,
|