Training in progress, step 1625, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:423b64fbb1ea140492cfc24ef9ece60648b191c70dd895716bfd7089e67b8208
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5d3b30fa482768f1ad2b80ee5bc9e5292d3baf25108f33db5b8a542a06ce420
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8eed12b44c80cdfb15b0a3f1fe87a11acc2ec200f42a39d9eb205ab6178ed036
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72ed1d5ef4494540c37a040ccc45c1b5c7b0e918a3cfabfd70bfd42fcea055f3
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1927,6 +1927,30 @@
|
|
| 1927 |
"reward_std": 0.2901748239994049,
|
| 1928 |
"rewards/custom_reward_simplified_v7_dblog": 0.759375,
|
| 1929 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1930 |
}
|
| 1931 |
],
|
| 1932 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.012941916677949363,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1625,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1927 |
"reward_std": 0.2901748239994049,
|
| 1928 |
"rewards/custom_reward_simplified_v7_dblog": 0.759375,
|
| 1929 |
"step": 1600
|
| 1930 |
+
},
|
| 1931 |
+
{
|
| 1932 |
+
"completion_length": 652.49375,
|
| 1933 |
+
"epoch": 0.012822452831691368,
|
| 1934 |
+
"grad_norm": 0.12168209999799728,
|
| 1935 |
+
"kl": 0.012750855972990393,
|
| 1936 |
+
"learning_rate": 1.4765192136847686e-06,
|
| 1937 |
+
"loss": 0.0005,
|
| 1938 |
+
"reward": 0.728125,
|
| 1939 |
+
"reward_std": 0.26915703564882276,
|
| 1940 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.728125,
|
| 1941 |
+
"step": 1610
|
| 1942 |
+
},
|
| 1943 |
+
{
|
| 1944 |
+
"completion_length": 660.95625,
|
| 1945 |
+
"epoch": 0.012902095395863365,
|
| 1946 |
+
"grad_norm": 0.13546766340732574,
|
| 1947 |
+
"kl": 0.013546877074986696,
|
| 1948 |
+
"learning_rate": 1.443454345648252e-06,
|
| 1949 |
+
"loss": 0.0005,
|
| 1950 |
+
"reward": 0.790625,
|
| 1951 |
+
"reward_std": 0.1937400370836258,
|
| 1952 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.790625,
|
| 1953 |
+
"step": 1620
|
| 1954 |
}
|
| 1955 |
],
|
| 1956 |
"logging_steps": 10,
|