Training in progress, step 1675, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2113bb192f2baa5cca5eaf82cdc29cc112a196f0d353c65e596bbc93a9194868
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d39914c7d80371a45e6dfe8ca4f4df8415bdc590cf39528b2981db04c2d9b5ab
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3abc69817bfc7e8384f353860e5ac5df7fe0bc25efa8cefc94df527ed53e449a
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3aa9ee72eac8c93f8b4aad2310d8b5495ea694aa75e620528535e72bb4aabf2e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1987,6 +1987,30 @@
|
|
| 1987 |
"reward_std": 0.24298151433467866,
|
| 1988 |
"rewards/custom_reward_simplified_v7_dblog": 0.73125,
|
| 1989 |
"step": 1650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1990 |
}
|
| 1991 |
],
|
| 1992 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.013340129498809344,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1675,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1987 |
"reward_std": 0.24298151433467866,
|
| 1988 |
"rewards/custom_reward_simplified_v7_dblog": 0.73125,
|
| 1989 |
"step": 1650
|
| 1990 |
+
},
|
| 1991 |
+
{
|
| 1992 |
+
"completion_length": 727.9625,
|
| 1993 |
+
"epoch": 0.01322066565255135,
|
| 1994 |
+
"grad_norm": 0.14750860631465912,
|
| 1995 |
+
"kl": 0.018067248188890515,
|
| 1996 |
+
"learning_rate": 1.313499075630899e-06,
|
| 1997 |
+
"loss": 0.0007,
|
| 1998 |
+
"reward": 0.721875,
|
| 1999 |
+
"reward_std": 0.30838647186756135,
|
| 2000 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.721875,
|
| 2001 |
+
"step": 1660
|
| 2002 |
+
},
|
| 2003 |
+
{
|
| 2004 |
+
"completion_length": 780.08125,
|
| 2005 |
+
"epoch": 0.013300308216723346,
|
| 2006 |
+
"grad_norm": 0.2386309951543808,
|
| 2007 |
+
"kl": 0.017110610962845385,
|
| 2008 |
+
"learning_rate": 1.2816206721818944e-06,
|
| 2009 |
+
"loss": 0.0007,
|
| 2010 |
+
"reward": 0.6375,
|
| 2011 |
+
"reward_std": 0.26727318242192266,
|
| 2012 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.6375,
|
| 2013 |
+
"step": 1670
|
| 2014 |
}
|
| 2015 |
],
|
| 2016 |
"logging_steps": 10,
|