smirki commited on
Commit
d74cd8f
·
verified ·
1 Parent(s): 9a5e9a4

Training in progress, step 950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b84ee20fdeed3b9de615bfb8c89a604ad31460bf4be8d9981259bc49daba7689
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cdab5afe89842ddf5f951fefb2a7cbceecc5e593787e5da35a587fa2f1fc19
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:568ebac81b184bad78bc6124292e26514957855d6547ab1ff84685977fe672bd
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aefb33efb95cb0cda912996733876b57741f474e96d089d56f4d5c60b686c3d
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bac1fa9dca95bae4349472079f496cd88fa65635d594e791ca9b0c9c4124f64
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3343f12ec5e83ccbe5792e5a25a22293eb382ff3f3b0b26a3d274d79100cbe82
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:078bc51854d278434d7ff9f22a495ff6f5ffaa9f8a1b36cbdab8eec0ef4607eb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039c08ad2d08bc621e50ca4bc0d72f4e62aa003991d9f191336efa92cd4bd720
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.007366937185909637,
5
  "eval_steps": 500,
6
- "global_step": 925,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1111,6 +1111,42 @@
1111
  "reward_std": 0.25242582634091376,
1112
  "rewards/custom_reward_simplified_v7_dblog": 0.74375,
1113
  "step": 920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1114
  }
1115
  ],
1116
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.007566043596339628,
5
  "eval_steps": 500,
6
+ "global_step": 950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1111
  "reward_std": 0.25242582634091376,
1112
  "rewards/custom_reward_simplified_v7_dblog": 0.74375,
1113
  "step": 920
1114
+ },
1115
+ {
1116
+ "completion_length": 640.1875,
1117
+ "epoch": 0.0074067584679956356,
1118
+ "grad_norm": 0.22241215407848358,
1119
+ "kl": 0.006700195767916739,
1120
+ "learning_rate": 3.8432490208670605e-06,
1121
+ "loss": 0.0003,
1122
+ "reward": 0.753125,
1123
+ "reward_std": 0.30004683434963225,
1124
+ "rewards/custom_reward_simplified_v7_dblog": 0.753125,
1125
+ "step": 930
1126
+ },
1127
+ {
1128
+ "completion_length": 671.025,
1129
+ "epoch": 0.007486401032167632,
1130
+ "grad_norm": 0.2610742747783661,
1131
+ "kl": 0.007203501905314625,
1132
+ "learning_rate": 3.8124414508364005e-06,
1133
+ "loss": 0.0003,
1134
+ "reward": 0.696875,
1135
+ "reward_std": 0.2809624969959259,
1136
+ "rewards/custom_reward_simplified_v7_dblog": 0.696875,
1137
+ "step": 940
1138
+ },
1139
+ {
1140
+ "completion_length": 644.56875,
1141
+ "epoch": 0.007566043596339628,
1142
+ "grad_norm": 0.18431080877780914,
1143
+ "kl": 0.006376700336113572,
1144
+ "learning_rate": 3.7813562519996633e-06,
1145
+ "loss": 0.0003,
1146
+ "reward": 0.775,
1147
+ "reward_std": 0.2690692335367203,
1148
+ "rewards/custom_reward_simplified_v7_dblog": 0.775,
1149
+ "step": 950
1150
  }
1151
  ],
1152
  "logging_steps": 10,