smirki commited on
Commit
ddc2d26
·
verified ·
1 Parent(s): 1c57ac2

Training in progress, step 850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be08547166dacd1b7d161c2f41cf30693b0c59e1e628537d5d7645d2ea4fcbbf
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce44bac1ce014ef00371dcfe7ea68b6f02d101b1d59b1aa9f17f88cfa3a6ffd
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:718a80129d5413c606f827de20a6c1acb27fb76dcd018a018ecd6df21b62763d
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6fcd046738e17acd72c93dbe6a5138541324f8eafc50e92bde39626096f2c0c
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:657c2c45e0b68fdd44aeab5a6c64190c8d2f3733cd86796d96b53daec5b3952e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c7652a6447ec0e6f7f58ed91608cc677939e784b3d87b8a23910c68dcce8f5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fc859065f77e5cbabb1b099e6f8bcbc876c53f6186972adb97152a1da9fa4d4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878236b83e42d9db1519008b5ab4529f212aeb27f63481cc4016850fcb712d03
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006570511544189677,
5
  "eval_steps": 500,
6
- "global_step": 825,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -991,6 +991,42 @@
991
  "reward_std": 0.24903304055333136,
992
  "rewards/custom_reward_simplified_v7_dblog": 0.684375,
993
  "step": 820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
994
  }
995
  ],
996
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.006769617954619667,
5
  "eval_steps": 500,
6
+ "global_step": 850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
991
  "reward_std": 0.24903304055333136,
992
  "rewards/custom_reward_simplified_v7_dblog": 0.684375,
993
  "step": 820
994
+ },
995
+ {
996
+ "completion_length": 658.575,
997
+ "epoch": 0.006610332826275675,
998
+ "grad_norm": 0.2923766076564789,
999
+ "kl": 0.0068331335205584764,
1000
+ "learning_rate": 4.134652238142674e-06,
1001
+ "loss": 0.0003,
1002
+ "reward": 0.73125,
1003
+ "reward_std": 0.3243869088590145,
1004
+ "rewards/custom_reward_simplified_v7_dblog": 0.73125,
1005
+ "step": 830
1006
+ },
1007
+ {
1008
+ "completion_length": 645.31875,
1009
+ "epoch": 0.006689975390447671,
1010
+ "grad_norm": 0.22414511442184448,
1011
+ "kl": 0.006329123536124826,
1012
+ "learning_rate": 4.106969024216348e-06,
1013
+ "loss": 0.0003,
1014
+ "reward": 0.728125,
1015
+ "reward_std": 0.2578707054257393,
1016
+ "rewards/custom_reward_simplified_v7_dblog": 0.728125,
1017
+ "step": 840
1018
+ },
1019
+ {
1020
+ "completion_length": 620.76875,
1021
+ "epoch": 0.006769617954619667,
1022
+ "grad_norm": 0.2500353455543518,
1023
+ "kl": 0.006427089823409915,
1024
+ "learning_rate": 4.078945878256244e-06,
1025
+ "loss": 0.0003,
1026
+ "reward": 0.85625,
1027
+ "reward_std": 0.3704014003276825,
1028
+ "rewards/custom_reward_simplified_v7_dblog": 0.85625,
1029
+ "step": 850
1030
  }
1031
  ],
1032
  "logging_steps": 10,