smirki commited on
Commit
154a011
·
verified ·
1 Parent(s): feea588

Training in progress, step 825, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d558d1688ac2b0773f776e1b01f11ad8d6b0eb98b60b2c83c0a92982a67ea9ef
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be08547166dacd1b7d161c2f41cf30693b0c59e1e628537d5d7645d2ea4fcbbf
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7a8d7e6d4c609cc4c9c06b2f897ea7480f74fe48ee69c6f4734353d452ce614
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:718a80129d5413c606f827de20a6c1acb27fb76dcd018a018ecd6df21b62763d
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4221eac67cb2222c2e69aeda63b681815cb4044bdeac0d3934f95aca477f1f85
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:657c2c45e0b68fdd44aeab5a6c64190c8d2f3733cd86796d96b53daec5b3952e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a1181d7867e1e99d3e40b537197ac0f6f5cff5e55e17adc3195122e9858dab9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fc859065f77e5cbabb1b099e6f8bcbc876c53f6186972adb97152a1da9fa4d4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006371405133759687,
5
  "eval_steps": 500,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -967,6 +967,30 @@
967
  "reward_std": 0.25832219421863556,
968
  "rewards/custom_reward_simplified_v7_dblog": 0.746875,
969
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
970
  }
971
  ],
972
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.006570511544189677,
5
  "eval_steps": 500,
6
+ "global_step": 825,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
967
  "reward_std": 0.25832219421863556,
968
  "rewards/custom_reward_simplified_v7_dblog": 0.746875,
969
  "step": 800
970
+ },
971
+ {
972
+ "completion_length": 652.925,
973
+ "epoch": 0.006451047697931683,
974
+ "grad_norm": 0.0062674470245838165,
975
+ "kl": 0.006221415114123374,
976
+ "learning_rate": 4.188975519039151e-06,
977
+ "loss": 0.0002,
978
+ "reward": 0.73125,
979
+ "reward_std": 0.3172403134405613,
980
+ "rewards/custom_reward_simplified_v7_dblog": 0.73125,
981
+ "step": 810
982
+ },
983
+ {
984
+ "completion_length": 668.63125,
985
+ "epoch": 0.006530690262103679,
986
+ "grad_norm": 0.13624051213264465,
987
+ "kl": 0.0063671735813841225,
988
+ "learning_rate": 4.161989664034844e-06,
989
+ "loss": 0.0003,
990
+ "reward": 0.684375,
991
+ "reward_std": 0.24903304055333136,
992
+ "rewards/custom_reward_simplified_v7_dblog": 0.684375,
993
+ "step": 820
994
  }
995
  ],
996
  "logging_steps": 10,