FormlessAI commited on
Commit
2a22a6f
·
verified ·
1 Parent(s): b635934

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56ebe1bf228e92e7b408d3b50ee7693165f94acc10f0b99500d6fd8704bd8006
3
  size 34895376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:128d5ff87a9a2f67bfc376762663cddc1baec41d7fc66c9f706a34faa11d5ce6
3
  size 34895376
last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67dd2c6eafa9e6373d4c602c404f3037feea34cf55ac5d5e5a7db1b2ba46303
3
+ size 26298085
last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28595be6eafc5d9e98b7173aa15802b7097913aedc4af74767a7c63f75db51db
3
+ size 26298085
last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d9531c02cab629a21f79c760ef90370c1f164696c1a6dc03ed212646d48458a
3
+ size 26298149
last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7578bc6c6c3ef05125df821f9f3409b6dbfd2844aba382f7af14c2d20459b4bc
3
+ size 26298149
last-checkpoint/global_step300/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df81a1524d9d57d65c7e452ca66412a0d9fe6e7785143cffdaf5e948a590fc02
3
+ size 501799973
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step200
 
1
+ global_step300
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6418eac1ba346ee73d2ebc80987f480e6c7019a31f3a512d1ae5584c16e752b
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dcd295e94e121d0e334ec06742c998279f2c41bf799fbd89390e9e5d7441448
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e184a24bebc3a4116bdcdf257d93475450a64bbb5df3fddcef6619ba8f70a59d
3
  size 15365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39ae4eb8db6190f758a03220812e9fcbc2a4823df90e3ccf1ba4764886733e3d
3
  size 15365
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86b280e6213e29ec0037b1f5f3daa8d0ee155a93389dd0c3cf1bd3d90c7da9a9
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbe37b1011b486d46f84f6c3acc88f2b0b90b3a3c2a6f2b01998012c708e63b
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64a2914181616f169732b6020ae236c388196a057b8e319b6822b0257e90beb3
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c2ef4d00af7939a9a25681a8548258d41f42c1471bf03f68a181fcb291b727
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:531f6f6748d810df4463be9a678fe7501b1690733d5508860c7f5f0586e3db9c
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:738f1bd52a0e5e3570eb826d97ec615ac0a30012cc827a5a3725e6329285a9f5
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.014669565483927727,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.258732212160414,
6
  "eval_steps": 100,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1024,11 +1024,519 @@
1024
  "eval_samples_per_second": 4.743,
1025
  "eval_steps_per_second": 0.308,
1026
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1027
  }
1028
  ],
1029
  "logging_steps": 5,
1030
  "max_steps": 1000,
1031
- "num_input_tokens_seen": 221849,
1032
  "num_train_epochs": 2,
1033
  "save_steps": 100,
1034
  "stateful_callbacks": {
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.0143232811242342,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.38809831824062097,
6
  "eval_steps": 100,
7
+ "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1024
  "eval_samples_per_second": 4.743,
1025
  "eval_steps_per_second": 0.308,
1026
  "step": 200
1027
+ },
1028
+ {
1029
+ "clip_ratio/high_max": 0.0,
1030
+ "clip_ratio/high_mean": 0.0,
1031
+ "clip_ratio/low_mean": 0.0,
1032
+ "clip_ratio/low_min": 0.0,
1033
+ "clip_ratio/region_mean": 0.0,
1034
+ "completions/clipped_ratio": 0.984375,
1035
+ "completions/max_length": 32.0,
1036
+ "completions/max_terminated_length": 11.7,
1037
+ "completions/mean_length": 31.9,
1038
+ "completions/mean_terminated_length": 10.7,
1039
+ "completions/min_length": 28.9,
1040
+ "completions/min_terminated_length": 9.7,
1041
+ "epoch": 0.2652005174644243,
1042
+ "grad_norm": 0.3019338548183441,
1043
+ "kl": 0.37265625,
1044
+ "learning_rate": 0.00019348256763960145,
1045
+ "loss": 0.0152,
1046
+ "num_tokens": 227432.0,
1047
+ "reward": 2.7201414823532106,
1048
+ "reward_std": 0.28786033391952515,
1049
+ "rewards/keyword_inclusion_reward/mean": 0.3062500089406967,
1050
+ "rewards/keyword_inclusion_reward/std": 0.08914534375071526,
1051
+ "step": 205
1052
+ },
1053
+ {
1054
+ "clip_ratio/high_max": 0.0,
1055
+ "clip_ratio/high_mean": 0.0,
1056
+ "clip_ratio/low_mean": 0.0,
1057
+ "clip_ratio/low_min": 0.0,
1058
+ "clip_ratio/region_mean": 0.0,
1059
+ "completions/clipped_ratio": 0.9625,
1060
+ "completions/max_length": 32.0,
1061
+ "completions/max_terminated_length": 17.6,
1062
+ "completions/mean_length": 31.86875,
1063
+ "completions/mean_terminated_length": 16.76666679382324,
1064
+ "completions/min_length": 28.2,
1065
+ "completions/min_terminated_length": 15.4,
1066
+ "epoch": 0.2716688227684347,
1067
+ "grad_norm": 0.09334196895360947,
1068
+ "kl": 0.4015625,
1069
+ "learning_rate": 0.00019284858268809137,
1070
+ "loss": 0.0161,
1071
+ "num_tokens": 232989.0,
1072
+ "reward": 2.8681763648986816,
1073
+ "reward_std": 0.13084560632705688,
1074
+ "rewards/keyword_inclusion_reward/mean": 0.3229166746139526,
1075
+ "rewards/keyword_inclusion_reward/std": 0.04457640573382378,
1076
+ "step": 210
1077
+ },
1078
+ {
1079
+ "clip_ratio/high_max": 0.0,
1080
+ "clip_ratio/high_mean": 0.0,
1081
+ "clip_ratio/low_mean": 0.0,
1082
+ "clip_ratio/low_min": 0.0,
1083
+ "clip_ratio/region_mean": 0.0,
1084
+ "completions/clipped_ratio": 0.91875,
1085
+ "completions/max_length": 32.0,
1086
+ "completions/max_terminated_length": 26.8,
1087
+ "completions/mean_length": 31.3375,
1088
+ "completions/mean_terminated_length": 24.0,
1089
+ "completions/min_length": 21.2,
1090
+ "completions/min_terminated_length": 21.2,
1091
+ "epoch": 0.278137128072445,
1092
+ "grad_norm": 0.1729985624551773,
1093
+ "kl": 0.4,
1094
+ "learning_rate": 0.00019218631515885006,
1095
+ "loss": 0.0126,
1096
+ "num_tokens": 238507.0,
1097
+ "reward": 2.9051851272583007,
1098
+ "reward_std": 0.07850736379623413,
1099
+ "rewards/keyword_inclusion_reward/mean": 0.3270833492279053,
1100
+ "rewards/keyword_inclusion_reward/std": 0.03535533919930458,
1101
+ "step": 215
1102
+ },
1103
+ {
1104
+ "clip_ratio/high_max": 0.0,
1105
+ "clip_ratio/high_mean": 0.0,
1106
+ "clip_ratio/low_mean": 0.0,
1107
+ "clip_ratio/low_min": 0.0,
1108
+ "clip_ratio/region_mean": 0.0,
1109
+ "completions/clipped_ratio": 0.9125,
1110
+ "completions/max_length": 32.0,
1111
+ "completions/max_terminated_length": 24.8,
1112
+ "completions/mean_length": 31.3625,
1113
+ "completions/mean_terminated_length": 20.679999923706056,
1114
+ "completions/min_length": 22.4,
1115
+ "completions/min_terminated_length": 16.0,
1116
+ "epoch": 0.2846054333764554,
1117
+ "grad_norm": 0.21469204127788544,
1118
+ "kl": 0.3677734375,
1119
+ "learning_rate": 0.0001914959667849825,
1120
+ "loss": 0.013,
1121
+ "num_tokens": 244017.0,
1122
+ "reward": 2.8126633167266846,
1123
+ "reward_std": 0.20935297012329102,
1124
+ "rewards/keyword_inclusion_reward/mean": 0.3166666805744171,
1125
+ "rewards/keyword_inclusion_reward/std": 0.0714939571917057,
1126
+ "step": 220
1127
+ },
1128
+ {
1129
+ "clip_ratio/high_max": 0.0,
1130
+ "clip_ratio/high_mean": 0.0,
1131
+ "clip_ratio/low_mean": 0.0,
1132
+ "clip_ratio/low_min": 0.0,
1133
+ "clip_ratio/region_mean": 0.0,
1134
+ "completions/clipped_ratio": 0.93125,
1135
+ "completions/max_length": 32.0,
1136
+ "completions/max_terminated_length": 22.2,
1137
+ "completions/mean_length": 31.6125,
1138
+ "completions/mean_terminated_length": 20.416666793823243,
1139
+ "completions/min_length": 25.4,
1140
+ "completions/min_terminated_length": 19.0,
1141
+ "epoch": 0.2910737386804657,
1142
+ "grad_norm": 0.048688553273677826,
1143
+ "kl": 0.344921875,
1144
+ "learning_rate": 0.00019077774785329087,
1145
+ "loss": 0.0161,
1146
+ "num_tokens": 249609.0,
1147
+ "reward": 2.886680793762207,
1148
+ "reward_std": 0.10467648506164551,
1149
+ "rewards/keyword_inclusion_reward/mean": 0.32500001788139343,
1150
+ "rewards/keyword_inclusion_reward/std": 0.047140452265739444,
1151
+ "step": 225
1152
+ },
1153
+ {
1154
+ "clip_ratio/high_max": 0.0,
1155
+ "clip_ratio/high_mean": 0.0,
1156
+ "clip_ratio/low_mean": 0.0,
1157
+ "clip_ratio/low_min": 0.0,
1158
+ "clip_ratio/region_mean": 0.0,
1159
+ "completions/clipped_ratio": 0.89375,
1160
+ "completions/max_length": 32.0,
1161
+ "completions/max_terminated_length": 28.6,
1162
+ "completions/mean_length": 31.3375,
1163
+ "completions/mean_terminated_length": 25.214286041259765,
1164
+ "completions/min_length": 21.6,
1165
+ "completions/min_terminated_length": 21.6,
1166
+ "epoch": 0.2975420439844761,
1167
+ "grad_norm": 0.03536103293299675,
1168
+ "kl": 0.3490234375,
1169
+ "learning_rate": 0.00019003187714021938,
1170
+ "loss": 0.0157,
1171
+ "num_tokens": 255097.0,
1172
+ "reward": 2.8866806983947755,
1173
+ "reward_std": 0.10467648506164551,
1174
+ "rewards/keyword_inclusion_reward/mean": 0.325000011920929,
1175
+ "rewards/keyword_inclusion_reward/std": 0.03152808472514153,
1176
+ "step": 230
1177
+ },
1178
+ {
1179
+ "clip_ratio/high_max": 0.0,
1180
+ "clip_ratio/high_mean": 0.0,
1181
+ "clip_ratio/low_mean": 0.0,
1182
+ "clip_ratio/low_min": 0.0,
1183
+ "clip_ratio/region_mean": 0.0,
1184
+ "completions/clipped_ratio": 0.8875,
1185
+ "completions/max_length": 32.0,
1186
+ "completions/max_terminated_length": 30.2,
1187
+ "completions/mean_length": 31.24375,
1188
+ "completions/mean_terminated_length": 25.026667022705077,
1189
+ "completions/min_length": 18.0,
1190
+ "completions/min_terminated_length": 18.0,
1191
+ "epoch": 0.3040103492884864,
1192
+ "grad_norm": 0.14790986478328705,
1193
+ "kl": 0.3205078125,
1194
+ "learning_rate": 0.00018925858184521256,
1195
+ "loss": 0.013,
1196
+ "num_tokens": 260542.0,
1197
+ "reward": 2.8126633167266846,
1198
+ "reward_std": 0.20935297012329102,
1199
+ "rewards/keyword_inclusion_reward/mean": 0.3166666805744171,
1200
+ "rewards/keyword_inclusion_reward/std": 0.0714939571917057,
1201
+ "step": 235
1202
+ },
1203
+ {
1204
+ "clip_ratio/high_max": 0.0,
1205
+ "clip_ratio/high_mean": 0.0,
1206
+ "clip_ratio/low_mean": 0.0,
1207
+ "clip_ratio/low_min": 0.0,
1208
+ "clip_ratio/region_mean": 0.0,
1209
+ "completions/clipped_ratio": 0.8875,
1210
+ "completions/max_length": 32.0,
1211
+ "completions/max_terminated_length": 30.2,
1212
+ "completions/mean_length": 31.30625,
1213
+ "completions/mean_terminated_length": 25.746667098999023,
1214
+ "completions/min_length": 20.0,
1215
+ "completions/min_terminated_length": 20.0,
1216
+ "epoch": 0.31047865459249674,
1217
+ "grad_norm": 0.2648606300354004,
1218
+ "kl": 0.3302734375,
1219
+ "learning_rate": 0.0001884580975215084,
1220
+ "loss": 0.0124,
1221
+ "num_tokens": 266065.0,
1222
+ "reward": 2.812663269042969,
1223
+ "reward_std": 0.20935297012329102,
1224
+ "rewards/keyword_inclusion_reward/mean": 0.31666667461395265,
1225
+ "rewards/keyword_inclusion_reward/std": 0.06431937739253044,
1226
+ "step": 240
1227
+ },
1228
+ {
1229
+ "clip_ratio/high_max": 0.0,
1230
+ "clip_ratio/high_mean": 0.0,
1231
+ "clip_ratio/low_mean": 0.0,
1232
+ "clip_ratio/low_min": 0.0,
1233
+ "clip_ratio/region_mean": 0.0,
1234
+ "completions/clipped_ratio": 0.9,
1235
+ "completions/max_length": 32.0,
1236
+ "completions/max_terminated_length": 25.0,
1237
+ "completions/mean_length": 31.53125,
1238
+ "completions/mean_terminated_length": 22.06666717529297,
1239
+ "completions/min_length": 25.2,
1240
+ "completions/min_terminated_length": 18.8,
1241
+ "epoch": 0.3169469598965071,
1242
+ "grad_norm": 0.14858758449554443,
1243
+ "kl": 0.3453125,
1244
+ "learning_rate": 0.00018763066800438636,
1245
+ "loss": 0.0138,
1246
+ "num_tokens": 271536.0,
1247
+ "reward": 2.905185079574585,
1248
+ "reward_std": 0.07850736379623413,
1249
+ "rewards/keyword_inclusion_reward/mean": 0.3270833432674408,
1250
+ "rewards/keyword_inclusion_reward/std": 0.028180759400129318,
1251
+ "step": 245
1252
+ },
1253
+ {
1254
+ "clip_ratio/high_max": 0.0,
1255
+ "clip_ratio/high_mean": 0.0,
1256
+ "clip_ratio/low_mean": 0.0,
1257
+ "clip_ratio/low_min": 0.0,
1258
+ "clip_ratio/region_mean": 0.0,
1259
+ "completions/clipped_ratio": 0.93125,
1260
+ "completions/max_length": 32.0,
1261
+ "completions/max_terminated_length": 30.8,
1262
+ "completions/mean_length": 31.6875,
1263
+ "completions/mean_terminated_length": 28.25,
1264
+ "completions/min_length": 25.6,
1265
+ "completions/min_terminated_length": 25.6,
1266
+ "epoch": 0.32341526520051744,
1267
+ "grad_norm": 0.039299800992012024,
1268
+ "kl": 0.3734375,
1269
+ "learning_rate": 0.00018677654533689287,
1270
+ "loss": 0.0149,
1271
+ "num_tokens": 277104.0,
1272
+ "reward": 2.9236894607543946,
1273
+ "reward_std": 0.052338242530822754,
1274
+ "rewards/keyword_inclusion_reward/mean": 0.3291666805744171,
1275
+ "rewards/keyword_inclusion_reward/std": 0.023570226132869722,
1276
+ "step": 250
1277
+ },
1278
+ {
1279
+ "clip_ratio/high_max": 0.0,
1280
+ "clip_ratio/high_mean": 0.0,
1281
+ "clip_ratio/low_mean": 0.0,
1282
+ "clip_ratio/low_min": 0.0,
1283
+ "clip_ratio/region_mean": 0.0,
1284
+ "completions/clipped_ratio": 0.95,
1285
+ "completions/max_length": 32.0,
1286
+ "completions/max_terminated_length": 22.4,
1287
+ "completions/mean_length": 31.65625,
1288
+ "completions/mean_terminated_length": 20.1,
1289
+ "completions/min_length": 24.2,
1290
+ "completions/min_terminated_length": 17.8,
1291
+ "epoch": 0.3298835705045278,
1292
+ "grad_norm": 0.2771835923194885,
1293
+ "kl": 0.35390625,
1294
+ "learning_rate": 0.00018589598969306645,
1295
+ "loss": 0.0112,
1296
+ "num_tokens": 282653.0,
1297
+ "reward": 2.7941588878631594,
1298
+ "reward_std": 0.28786033391952515,
1299
+ "rewards/keyword_inclusion_reward/mean": 0.3145833432674408,
1300
+ "rewards/keyword_inclusion_reward/std": 0.07305490300059318,
1301
+ "step": 255
1302
+ },
1303
+ {
1304
+ "clip_ratio/high_max": 0.0,
1305
+ "clip_ratio/high_mean": 0.0,
1306
+ "clip_ratio/low_mean": 0.0,
1307
+ "clip_ratio/low_min": 0.0,
1308
+ "clip_ratio/region_mean": 0.0,
1309
+ "completions/clipped_ratio": 0.98125,
1310
+ "completions/max_length": 32.0,
1311
+ "completions/max_terminated_length": 16.8,
1312
+ "completions/mean_length": 31.925,
1313
+ "completions/mean_terminated_length": 16.8,
1314
+ "completions/min_length": 29.6,
1315
+ "completions/min_terminated_length": 16.8,
1316
+ "epoch": 0.33635187580853815,
1317
+ "grad_norm": 0.04248747602105141,
1318
+ "kl": 0.3732421875,
1319
+ "learning_rate": 0.00018498926929868642,
1320
+ "loss": 0.0149,
1321
+ "num_tokens": 288199.0,
1322
+ "reward": 2.8311676025390624,
1323
+ "reward_std": 0.18318384885787964,
1324
+ "rewards/keyword_inclusion_reward/mean": 0.31875001192092894,
1325
+ "rewards/keyword_inclusion_reward/std": 0.05127105638384819,
1326
+ "step": 260
1327
+ },
1328
+ {
1329
+ "clip_ratio/high_max": 0.0,
1330
+ "clip_ratio/high_mean": 0.0,
1331
+ "clip_ratio/low_mean": 0.0,
1332
+ "clip_ratio/low_min": 0.0,
1333
+ "clip_ratio/region_mean": 0.0,
1334
+ "completions/clipped_ratio": 0.975,
1335
+ "completions/max_length": 32.0,
1336
+ "completions/max_terminated_length": 18.0,
1337
+ "completions/mean_length": 31.9375,
1338
+ "completions/mean_terminated_length": 17.7,
1339
+ "completions/min_length": 30.2,
1340
+ "completions/min_terminated_length": 17.4,
1341
+ "epoch": 0.3428201811125485,
1342
+ "grad_norm": 0.21370354294776917,
1343
+ "kl": 0.3591796875,
1344
+ "learning_rate": 0.00018405666034956844,
1345
+ "loss": 0.0144,
1346
+ "num_tokens": 293805.0,
1347
+ "reward": 2.886680746078491,
1348
+ "reward_std": 0.10467648506164551,
1349
+ "rewards/keyword_inclusion_reward/mean": 0.325000011920929,
1350
+ "rewards/keyword_inclusion_reward/std": 0.039965872466564176,
1351
+ "step": 265
1352
+ },
1353
+ {
1354
+ "clip_ratio/high_max": 0.0,
1355
+ "clip_ratio/high_mean": 0.0,
1356
+ "clip_ratio/low_mean": 0.0,
1357
+ "clip_ratio/low_min": 0.0,
1358
+ "clip_ratio/region_mean": 0.0,
1359
+ "completions/clipped_ratio": 0.975,
1360
+ "completions/max_length": 32.0,
1361
+ "completions/max_terminated_length": 18.0,
1362
+ "completions/mean_length": 31.89375,
1363
+ "completions/mean_terminated_length": 17.2,
1364
+ "completions/min_length": 29.2,
1365
+ "completions/min_terminated_length": 16.4,
1366
+ "epoch": 0.34928848641655885,
1367
+ "grad_norm": 0.17620764672756195,
1368
+ "kl": 0.3935546875,
1369
+ "learning_rate": 0.00018309844692743283,
1370
+ "loss": 0.0157,
1371
+ "num_tokens": 299424.0,
1372
+ "reward": 2.9236894607543946,
1373
+ "reward_std": 0.052338242530822754,
1374
+ "rewards/keyword_inclusion_reward/mean": 0.3291666805744171,
1375
+ "rewards/keyword_inclusion_reward/std": 0.023570226132869722,
1376
+ "step": 270
1377
+ },
1378
+ {
1379
+ "clip_ratio/high_max": 0.0,
1380
+ "clip_ratio/high_mean": 0.0,
1381
+ "clip_ratio/low_mean": 0.0,
1382
+ "clip_ratio/low_min": 0.0,
1383
+ "clip_ratio/region_mean": 0.0,
1384
+ "completions/clipped_ratio": 0.96875,
1385
+ "completions/max_length": 32.0,
1386
+ "completions/max_terminated_length": 11.8,
1387
+ "completions/mean_length": 31.84375,
1388
+ "completions/mean_terminated_length": 10.833333587646484,
1389
+ "completions/min_length": 29.2,
1390
+ "completions/min_terminated_length": 10.0,
1391
+ "epoch": 0.35575679172056923,
1392
+ "grad_norm": 0.2770165205001831,
1393
+ "kl": 0.35625,
1394
+ "learning_rate": 0.00018211492091337042,
1395
+ "loss": 0.0143,
1396
+ "num_tokens": 305035.0,
1397
+ "reward": 2.7941588878631594,
1398
+ "reward_std": 0.18318384885787964,
1399
+ "rewards/keyword_inclusion_reward/mean": 0.3145833432674408,
1400
+ "rewards/keyword_inclusion_reward/std": 0.05612155273556709,
1401
+ "step": 275
1402
+ },
1403
+ {
1404
+ "clip_ratio/high_max": 0.0,
1405
+ "clip_ratio/high_mean": 0.0,
1406
+ "clip_ratio/low_mean": 0.0,
1407
+ "clip_ratio/low_min": 0.0,
1408
+ "clip_ratio/region_mean": 0.0,
1409
+ "completions/clipped_ratio": 0.95,
1410
+ "completions/max_length": 32.0,
1411
+ "completions/max_terminated_length": 22.6,
1412
+ "completions/mean_length": 31.66875,
1413
+ "completions/mean_terminated_length": 20.3,
1414
+ "completions/min_length": 24.4,
1415
+ "completions/min_terminated_length": 18.0,
1416
+ "epoch": 0.36222509702457956,
1417
+ "grad_norm": 0.13597851991653442,
1418
+ "kl": 0.3595703125,
1419
+ "learning_rate": 0.00018110638189893267,
1420
+ "loss": 0.0138,
1421
+ "num_tokens": 310616.0,
1422
+ "reward": 2.812663221359253,
1423
+ "reward_std": 0.20935297012329102,
1424
+ "rewards/keyword_inclusion_reward/mean": 0.31666667461395265,
1425
+ "rewards/keyword_inclusion_reward/std": 0.07281493991613389,
1426
+ "step": 280
1427
+ },
1428
+ {
1429
+ "clip_ratio/high_max": 0.0,
1430
+ "clip_ratio/high_mean": 0.0,
1431
+ "clip_ratio/low_mean": 0.0,
1432
+ "clip_ratio/low_min": 0.0,
1433
+ "clip_ratio/region_mean": 0.0,
1434
+ "completions/clipped_ratio": 0.93125,
1435
+ "completions/max_length": 32.0,
1436
+ "completions/max_terminated_length": 24.4,
1437
+ "completions/mean_length": 31.575,
1438
+ "completions/mean_terminated_length": 21.683333587646484,
1439
+ "completions/min_length": 23.0,
1440
+ "completions/min_terminated_length": 16.6,
1441
+ "epoch": 0.36869340232858994,
1442
+ "grad_norm": 0.13746124505996704,
1443
+ "kl": 0.3494140625,
1444
+ "learning_rate": 0.00018007313709487334,
1445
+ "loss": 0.014,
1446
+ "num_tokens": 316070.0,
1447
+ "reward": 2.9236894607543946,
1448
+ "reward_std": 0.052338242530822754,
1449
+ "rewards/keyword_inclusion_reward/mean": 0.3291666805744171,
1450
+ "rewards/keyword_inclusion_reward/std": 0.023570226132869722,
1451
+ "step": 285
1452
+ },
1453
+ {
1454
+ "clip_ratio/high_max": 0.0,
1455
+ "clip_ratio/high_mean": 0.0,
1456
+ "clip_ratio/low_mean": 0.0,
1457
+ "clip_ratio/low_min": 0.0,
1458
+ "clip_ratio/region_mean": 0.0,
1459
+ "completions/clipped_ratio": 0.95,
1460
+ "completions/max_length": 32.0,
1461
+ "completions/max_terminated_length": 23.2,
1462
+ "completions/mean_length": 31.39375,
1463
+ "completions/mean_terminated_length": 17.233333587646484,
1464
+ "completions/min_length": 17.8,
1465
+ "completions/min_terminated_length": 11.4,
1466
+ "epoch": 0.37516170763260026,
1467
+ "grad_norm": 0.42286813259124756,
1468
+ "kl": 0.3724609375,
1469
+ "learning_rate": 0.00017901550123756906,
1470
+ "loss": 0.0059,
1471
+ "num_tokens": 321569.0,
1472
+ "reward": 2.8311676502227785,
1473
+ "reward_std": 0.18318384885787964,
1474
+ "rewards/keyword_inclusion_reward/mean": 0.31875001192092894,
1475
+ "rewards/keyword_inclusion_reward/std": 0.050581476837396624,
1476
+ "step": 290
1477
+ },
1478
+ {
1479
+ "clip_ratio/high_max": 0.0,
1480
+ "clip_ratio/high_mean": 0.0,
1481
+ "clip_ratio/low_mean": 0.0,
1482
+ "clip_ratio/low_min": 0.0,
1483
+ "clip_ratio/region_mean": 0.0,
1484
+ "completions/clipped_ratio": 0.95,
1485
+ "completions/max_length": 32.0,
1486
+ "completions/max_terminated_length": 24.0,
1487
+ "completions/mean_length": 31.8625,
1488
+ "completions/mean_terminated_length": 23.333333587646486,
1489
+ "completions/min_length": 29.4,
1490
+ "completions/min_terminated_length": 23.0,
1491
+ "epoch": 0.3816300129366106,
1492
+ "grad_norm": 0.17354200780391693,
1493
+ "kl": 0.3912109375,
1494
+ "learning_rate": 0.00017793379649314744,
1495
+ "loss": 0.0156,
1496
+ "num_tokens": 327143.0,
1497
+ "reward": 2.8681763648986816,
1498
+ "reward_std": 0.13084560632705688,
1499
+ "rewards/keyword_inclusion_reward/mean": 0.3229166746139526,
1500
+ "rewards/keyword_inclusion_reward/std": 0.04457640573382378,
1501
+ "step": 295
1502
+ },
1503
+ {
1504
+ "epoch": 0.38809831824062097,
1505
+ "grad_norm": 0.23563814163208008,
1506
+ "learning_rate": 0.00017682835235935236,
1507
+ "loss": 0.014,
1508
+ "step": 300
1509
+ },
1510
+ {
1511
+ "epoch": 0.38809831824062097,
1512
+ "eval_clip_ratio/high_max": 0.0,
1513
+ "eval_clip_ratio/high_mean": 0.0,
1514
+ "eval_clip_ratio/low_mean": 0.0,
1515
+ "eval_clip_ratio/low_min": 0.0,
1516
+ "eval_clip_ratio/region_mean": 0.0,
1517
+ "eval_completions/clipped_ratio": 0.9840425531914894,
1518
+ "eval_completions/max_length": 32.0,
1519
+ "eval_completions/max_terminated_length": 7.085106382978723,
1520
+ "eval_completions/mean_length": 31.93218085106383,
1521
+ "eval_completions/mean_terminated_length": 7.085106382978723,
1522
+ "eval_completions/min_length": 30.914893617021278,
1523
+ "eval_completions/min_terminated_length": 7.085106382978723,
1524
+ "eval_kl": 0.3568816489361702,
1525
+ "eval_loss": 0.0143232811242342,
1526
+ "eval_num_tokens": 332728.0,
1527
+ "eval_reward": 2.7559690475463867,
1528
+ "eval_reward_std": 0.26725911079569065,
1529
+ "eval_rewards/keyword_inclusion_reward/mean": 0.31028369639782194,
1530
+ "eval_rewards/keyword_inclusion_reward/std": 0.0670770841076019,
1531
+ "eval_runtime": 77.8145,
1532
+ "eval_samples_per_second": 4.755,
1533
+ "eval_steps_per_second": 0.308,
1534
+ "step": 300
1535
  }
1536
  ],
1537
  "logging_steps": 5,
1538
  "max_steps": 1000,
1539
+ "num_input_tokens_seen": 332728,
1540
  "num_train_epochs": 2,
1541
  "save_steps": 100,
1542
  "stateful_callbacks": {