FormlessAI commited on
Commit
023773b
·
verified ·
1 Parent(s): 3421f9b

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc72548ded001d4bc395e608c14c2a0ec4dfd76ebc7ed6a413a1f6ed0e4697ff
3
  size 83920720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:095753d1308fca4d68bd50eee28c0d64ec3d81fcd352020de685e3e2b9697fb5
3
  size 83920720
last-checkpoint/global_step700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a060593b440423607faacd1b99bd9ea5033e7c32bbbce869b135a4445b1df75
3
+ size 63256165
last-checkpoint/global_step700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e0ca9a7eb6451a86f4dabe7594dafadc5718627e49302966606a36148d751cd
3
+ size 63256229
last-checkpoint/global_step700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abd9fb04daf69ce6b1421696d3e31bbde42f3a466ffdbfa64dd2f7c0c1caca66
3
+ size 63256229
last-checkpoint/global_step700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c79d800c97fb10117092f9ec04346c270ab37cdf4dc3eac702a8944e792013
3
+ size 63256229
last-checkpoint/global_step700/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3f68e58bd36f4b61d7415dc2f56e5d6defe6980f584b727e4e84a33a6aa1674
3
+ size 84096473
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step600
 
1
+ global_step700
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf93295ed54940cd79af1e1aaddfdeb4609fcc104270f909fcc515520967288e
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:101ab08e2194627d031e7fc8097a3ac23d583896a9fe2b312020a2bed3639d65
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac3aef75a59ea6ad387b25e06351c64a2919fc2d8c55cef576a7e22f71e8c84d
3
  size 15365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c70384deaa4c1845a6f2d4fb20565360de13afbc60092206cf9f88cd1e723c2
3
  size 15365
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3d9d24bd0dc92288033cf7ee4421fb2618f273c726c7605dc11b35fe0853c27
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b6933871a383f04bb8f7db2236c9030798486f4c60fc0af36965963882a317
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d30687970cc2593bf26c2c0a6fd6bb58fb7ce5184b8eb8355abf1e0ad88df3ad
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93677c1a0c522ff69f16203c7d55be7cbdea8e71a0bdc8a1344a5cb9c0605e55
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55810183f880136ee8c48bcdb7d301ae1f05e04eaec38d45af81caed8ea180c3
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79c56dc8bc830a15efd2cc23c66236839ef75d4ec3e413a9fbb8dae549996719
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.02561848610639572,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.14218009478672985,
6
  "eval_steps": 100,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4016,11 +4016,679 @@
4016
  "eval_samples_per_second": 3.956,
4017
  "eval_steps_per_second": 0.248,
4018
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4019
  }
4020
  ],
4021
  "logging_steps": 5,
4022
  "max_steps": 1000,
4023
- "num_input_tokens_seen": 1024603,
4024
  "num_train_epochs": 1,
4025
  "save_steps": 100,
4026
  "stateful_callbacks": {
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.02497861161828041,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.16587677725118483,
6
  "eval_steps": 100,
7
+ "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4016
  "eval_samples_per_second": 3.956,
4017
  "eval_steps_per_second": 0.248,
4018
  "step": 600
4019
+ },
4020
+ {
4021
+ "clip_ratio/high_max": 0.0,
4022
+ "clip_ratio/high_mean": 0.0,
4023
+ "clip_ratio/low_mean": 0.0,
4024
+ "clip_ratio/low_min": 0.0,
4025
+ "clip_ratio/region_mean": 0.0,
4026
+ "completions/clipped_ratio": 1.0,
4027
+ "completions/max_length": 32.0,
4028
+ "completions/max_terminated_length": 0.0,
4029
+ "completions/mean_length": 32.0,
4030
+ "completions/mean_terminated_length": 0.0,
4031
+ "completions/min_length": 32.0,
4032
+ "completions/min_terminated_length": 0.0,
4033
+ "epoch": 0.14336492890995262,
4034
+ "grad_norm": 0.1272319108247757,
4035
+ "kl": 0.7216796875,
4036
+ "learning_rate": 8.126186854142752e-05,
4037
+ "loss": 0.0284,
4038
+ "num_tokens": 1033597.0,
4039
+ "reward": 953.1578125,
4040
+ "reward_std": 7.4799497604370115,
4041
+ "rewards/concise_response_reward/mean": 9.994318199157714,
4042
+ "rewards/concise_response_reward/std": 0.03214122056961059,
4043
+ "rewards/penalize_unwanted_words/mean": 1.0,
4044
+ "rewards/penalize_unwanted_words/std": 0.0,
4045
+ "rewards/reward_func_length/mean": 98.56343536376953,
4046
+ "rewards/reward_func_length/std": 1.168010425567627,
4047
+ "rewards/reward_keyword_presence/mean": 0.0,
4048
+ "rewards/reward_keyword_presence/std": 0.0,
4049
+ "rewards/reward_short_answers/mean": 9.994318199157714,
4050
+ "rewards/reward_short_answers/std": 0.03214122056961059,
4051
+ "step": 605
4052
+ },
4053
+ {
4054
+ "clip_ratio/high_max": 0.0,
4055
+ "clip_ratio/high_mean": 0.0,
4056
+ "clip_ratio/low_mean": 0.0,
4057
+ "clip_ratio/low_min": 0.0,
4058
+ "clip_ratio/region_mean": 0.0,
4059
+ "completions/clipped_ratio": 1.0,
4060
+ "completions/max_length": 32.0,
4061
+ "completions/max_terminated_length": 0.0,
4062
+ "completions/mean_length": 32.0,
4063
+ "completions/mean_terminated_length": 0.0,
4064
+ "completions/min_length": 32.0,
4065
+ "completions/min_terminated_length": 0.0,
4066
+ "epoch": 0.14454976303317535,
4067
+ "grad_norm": 0.05021500214934349,
4068
+ "kl": 0.66953125,
4069
+ "learning_rate": 7.955039481582097e-05,
4070
+ "loss": 0.0268,
4071
+ "num_tokens": 1042439.0,
4072
+ "reward": 954.049658203125,
4073
+ "reward_std": 5.795430326461792,
4074
+ "rewards/concise_response_reward/mean": 10.0,
4075
+ "rewards/concise_response_reward/std": 0.0,
4076
+ "rewards/penalize_unwanted_words/mean": 1.0,
4077
+ "rewards/penalize_unwanted_words/std": 0.0,
4078
+ "rewards/reward_func_length/mean": 98.65749816894531,
4079
+ "rewards/reward_func_length/std": 1.0676976799964906,
4080
+ "rewards/reward_keyword_presence/mean": 0.0,
4081
+ "rewards/reward_keyword_presence/std": 0.0,
4082
+ "rewards/reward_short_answers/mean": 10.0,
4083
+ "rewards/reward_short_answers/std": 0.0,
4084
+ "step": 610
4085
+ },
4086
+ {
4087
+ "clip_ratio/high_max": 0.0,
4088
+ "clip_ratio/high_mean": 0.0,
4089
+ "clip_ratio/low_mean": 0.0,
4090
+ "clip_ratio/low_min": 0.0,
4091
+ "clip_ratio/region_mean": 0.0,
4092
+ "completions/clipped_ratio": 1.0,
4093
+ "completions/max_length": 32.0,
4094
+ "completions/max_terminated_length": 0.0,
4095
+ "completions/mean_length": 32.0,
4096
+ "completions/mean_terminated_length": 0.0,
4097
+ "completions/min_length": 32.0,
4098
+ "completions/min_terminated_length": 0.0,
4099
+ "epoch": 0.1457345971563981,
4100
+ "grad_norm": 0.01850374974310398,
4101
+ "kl": 0.690625,
4102
+ "learning_rate": 7.784515023805328e-05,
4103
+ "loss": 0.0276,
4104
+ "num_tokens": 1051223.0,
4105
+ "reward": 952.8959228515625,
4106
+ "reward_std": 6.792151546478271,
4107
+ "rewards/concise_response_reward/mean": 9.994318199157714,
4108
+ "rewards/concise_response_reward/std": 0.03214122056961059,
4109
+ "rewards/penalize_unwanted_words/mean": 1.0,
4110
+ "rewards/penalize_unwanted_words/std": 0.0,
4111
+ "rewards/reward_func_length/mean": 98.53499755859374,
4112
+ "rewards/reward_func_length/std": 1.3000189661979675,
4113
+ "rewards/reward_keyword_presence/mean": 0.0,
4114
+ "rewards/reward_keyword_presence/std": 0.0,
4115
+ "rewards/reward_short_answers/mean": 9.994318199157714,
4116
+ "rewards/reward_short_answers/std": 0.03214122056961059,
4117
+ "step": 615
4118
+ },
4119
+ {
4120
+ "clip_ratio/high_max": 0.0,
4121
+ "clip_ratio/high_mean": 0.0,
4122
+ "clip_ratio/low_mean": 0.0,
4123
+ "clip_ratio/low_min": 0.0,
4124
+ "clip_ratio/region_mean": 0.0,
4125
+ "completions/clipped_ratio": 1.0,
4126
+ "completions/max_length": 32.0,
4127
+ "completions/max_terminated_length": 0.0,
4128
+ "completions/mean_length": 32.0,
4129
+ "completions/mean_terminated_length": 0.0,
4130
+ "completions/min_length": 32.0,
4131
+ "completions/min_terminated_length": 0.0,
4132
+ "epoch": 0.14691943127962084,
4133
+ "grad_norm": 0.13635386526584625,
4134
+ "kl": 0.72109375,
4135
+ "learning_rate": 7.614665424214193e-05,
4136
+ "loss": 0.0288,
4137
+ "num_tokens": 1060561.0,
4138
+ "reward": 951.67314453125,
4139
+ "reward_std": 7.1376995086669925,
4140
+ "rewards/concise_response_reward/mean": 9.98863639831543,
4141
+ "rewards/concise_response_reward/std": 0.044715401530265805,
4142
+ "rewards/penalize_unwanted_words/mean": 1.0,
4143
+ "rewards/penalize_unwanted_words/std": 0.0,
4144
+ "rewards/reward_func_length/mean": 98.40499877929688,
4145
+ "rewards/reward_func_length/std": 1.3004050493240356,
4146
+ "rewards/reward_keyword_presence/mean": 0.0,
4147
+ "rewards/reward_keyword_presence/std": 0.0,
4148
+ "rewards/reward_short_answers/mean": 9.98863639831543,
4149
+ "rewards/reward_short_answers/std": 0.044715401530265805,
4150
+ "step": 620
4151
+ },
4152
+ {
4153
+ "clip_ratio/high_max": 0.0,
4154
+ "clip_ratio/high_mean": 0.0,
4155
+ "clip_ratio/low_mean": 0.0,
4156
+ "clip_ratio/low_min": 0.0,
4157
+ "clip_ratio/region_mean": 0.0,
4158
+ "completions/clipped_ratio": 1.0,
4159
+ "completions/max_length": 32.0,
4160
+ "completions/max_terminated_length": 0.0,
4161
+ "completions/mean_length": 32.0,
4162
+ "completions/mean_terminated_length": 0.0,
4163
+ "completions/min_length": 32.0,
4164
+ "completions/min_terminated_length": 0.0,
4165
+ "epoch": 0.1481042654028436,
4166
+ "grad_norm": 0.048541147261857986,
4167
+ "kl": 0.697265625,
4168
+ "learning_rate": 7.445542420642097e-05,
4169
+ "loss": 0.0279,
4170
+ "num_tokens": 1069131.0,
4171
+ "reward": 951.8170288085937,
4172
+ "reward_std": 8.448184394836426,
4173
+ "rewards/concise_response_reward/mean": 9.98863639831543,
4174
+ "rewards/concise_response_reward/std": 0.044715401530265805,
4175
+ "rewards/penalize_unwanted_words/mean": 1.0,
4176
+ "rewards/penalize_unwanted_words/std": 0.0,
4177
+ "rewards/reward_func_length/mean": 98.42062377929688,
4178
+ "rewards/reward_func_length/std": 1.3346670150756836,
4179
+ "rewards/reward_keyword_presence/mean": 0.0,
4180
+ "rewards/reward_keyword_presence/std": 0.0,
4181
+ "rewards/reward_short_answers/mean": 9.98863639831543,
4182
+ "rewards/reward_short_answers/std": 0.044715401530265805,
4183
+ "step": 625
4184
+ },
4185
+ {
4186
+ "clip_ratio/high_max": 0.0,
4187
+ "clip_ratio/high_mean": 0.0,
4188
+ "clip_ratio/low_mean": 0.0,
4189
+ "clip_ratio/low_min": 0.0,
4190
+ "clip_ratio/region_mean": 0.0,
4191
+ "completions/clipped_ratio": 1.0,
4192
+ "completions/max_length": 32.0,
4193
+ "completions/max_terminated_length": 0.0,
4194
+ "completions/mean_length": 32.0,
4195
+ "completions/mean_terminated_length": 0.0,
4196
+ "completions/min_length": 32.0,
4197
+ "completions/min_terminated_length": 0.0,
4198
+ "epoch": 0.14928909952606634,
4199
+ "grad_norm": 0.11737528443336487,
4200
+ "kl": 0.756640625,
4201
+ "learning_rate": 7.277197529594257e-05,
4202
+ "loss": 0.0303,
4203
+ "num_tokens": 1077703.0,
4204
+ "reward": 949.5781005859375,
4205
+ "reward_std": 10.019141483306885,
4206
+ "rewards/concise_response_reward/mean": 9.98863639831543,
4207
+ "rewards/concise_response_reward/std": 0.044715401530265805,
4208
+ "rewards/penalize_unwanted_words/mean": 1.0,
4209
+ "rewards/penalize_unwanted_words/std": 0.0,
4210
+ "rewards/reward_func_length/mean": 98.17749786376953,
4211
+ "rewards/reward_func_length/std": 1.5676434755325317,
4212
+ "rewards/reward_keyword_presence/mean": 0.0,
4213
+ "rewards/reward_keyword_presence/std": 0.0,
4214
+ "rewards/reward_short_answers/mean": 9.98863639831543,
4215
+ "rewards/reward_short_answers/std": 0.044715401530265805,
4216
+ "step": 630
4217
+ },
4218
+ {
4219
+ "clip_ratio/high_max": 0.0,
4220
+ "clip_ratio/high_mean": 0.0,
4221
+ "clip_ratio/low_mean": 0.0,
4222
+ "clip_ratio/low_min": 0.0,
4223
+ "clip_ratio/region_mean": 0.0,
4224
+ "completions/clipped_ratio": 1.0,
4225
+ "completions/max_length": 32.0,
4226
+ "completions/max_terminated_length": 0.0,
4227
+ "completions/mean_length": 32.0,
4228
+ "completions/mean_terminated_length": 0.0,
4229
+ "completions/min_length": 32.0,
4230
+ "completions/min_terminated_length": 0.0,
4231
+ "epoch": 0.1504739336492891,
4232
+ "grad_norm": 0.17389771342277527,
4233
+ "kl": 0.729296875,
4234
+ "learning_rate": 7.109682030555283e-05,
4235
+ "loss": 0.0291,
4236
+ "num_tokens": 1086523.0,
4237
+ "reward": 949.6324951171875,
4238
+ "reward_std": 10.87011775970459,
4239
+ "rewards/concise_response_reward/mean": 9.994318199157714,
4240
+ "rewards/concise_response_reward/std": 0.03214122056961059,
4241
+ "rewards/penalize_unwanted_words/mean": 1.0,
4242
+ "rewards/penalize_unwanted_words/std": 0.0,
4243
+ "rewards/reward_func_length/mean": 98.18061981201171,
4244
+ "rewards/reward_func_length/std": 1.482282567024231,
4245
+ "rewards/reward_keyword_presence/mean": 0.0,
4246
+ "rewards/reward_keyword_presence/std": 0.0,
4247
+ "rewards/reward_short_answers/mean": 9.994318199157714,
4248
+ "rewards/reward_short_answers/std": 0.03214122056961059,
4249
+ "step": 635
4250
+ },
4251
+ {
4252
+ "clip_ratio/high_max": 0.0,
4253
+ "clip_ratio/high_mean": 0.0,
4254
+ "clip_ratio/low_mean": 0.0,
4255
+ "clip_ratio/low_min": 0.0,
4256
+ "clip_ratio/region_mean": 0.0,
4257
+ "completions/clipped_ratio": 1.0,
4258
+ "completions/max_length": 32.0,
4259
+ "completions/max_terminated_length": 0.0,
4260
+ "completions/mean_length": 32.0,
4261
+ "completions/mean_terminated_length": 0.0,
4262
+ "completions/min_length": 32.0,
4263
+ "completions/min_terminated_length": 0.0,
4264
+ "epoch": 0.15165876777251186,
4265
+ "grad_norm": 0.13560406863689423,
4266
+ "kl": 0.76875,
4267
+ "learning_rate": 6.943046950368944e-05,
4268
+ "loss": 0.0308,
4269
+ "num_tokens": 1095301.0,
4270
+ "reward": 951.1200439453125,
4271
+ "reward_std": 7.643131446838379,
4272
+ "rewards/concise_response_reward/mean": 10.0,
4273
+ "rewards/concise_response_reward/std": 0.0,
4274
+ "rewards/penalize_unwanted_words/mean": 1.0,
4275
+ "rewards/penalize_unwanted_words/std": 0.0,
4276
+ "rewards/reward_func_length/mean": 98.33937377929688,
4277
+ "rewards/reward_func_length/std": 1.4927765369415282,
4278
+ "rewards/reward_keyword_presence/mean": 0.0,
4279
+ "rewards/reward_keyword_presence/std": 0.0,
4280
+ "rewards/reward_short_answers/mean": 10.0,
4281
+ "rewards/reward_short_answers/std": 0.0,
4282
+ "step": 640
4283
+ },
4284
+ {
4285
+ "clip_ratio/high_max": 0.0,
4286
+ "clip_ratio/high_mean": 0.0,
4287
+ "clip_ratio/low_mean": 0.0,
4288
+ "clip_ratio/low_min": 0.0,
4289
+ "clip_ratio/region_mean": 0.0,
4290
+ "completions/clipped_ratio": 1.0,
4291
+ "completions/max_length": 32.0,
4292
+ "completions/max_terminated_length": 0.0,
4293
+ "completions/mean_length": 32.0,
4294
+ "completions/mean_terminated_length": 0.0,
4295
+ "completions/min_length": 32.0,
4296
+ "completions/min_terminated_length": 0.0,
4297
+ "epoch": 0.1528436018957346,
4298
+ "grad_norm": 0.045086201280355453,
4299
+ "kl": 0.6765625,
4300
+ "learning_rate": 6.77734304769489e-05,
4301
+ "loss": 0.0271,
4302
+ "num_tokens": 1104133.0,
4303
+ "reward": 951.9698120117188,
4304
+ "reward_std": 7.085178184509277,
4305
+ "rewards/concise_response_reward/mean": 9.982954597473144,
4306
+ "rewards/concise_response_reward/std": 0.09642366170883179,
4307
+ "rewards/penalize_unwanted_words/mean": 1.0,
4308
+ "rewards/penalize_unwanted_words/std": 0.0,
4309
+ "rewards/reward_func_length/mean": 98.43999938964843,
4310
+ "rewards/reward_func_length/std": 1.1985483288764953,
4311
+ "rewards/reward_keyword_presence/mean": 0.0,
4312
+ "rewards/reward_keyword_presence/std": 0.0,
4313
+ "rewards/reward_short_answers/mean": 9.982954597473144,
4314
+ "rewards/reward_short_answers/std": 0.09642366170883179,
4315
+ "step": 645
4316
+ },
4317
+ {
4318
+ "clip_ratio/high_max": 0.0,
4319
+ "clip_ratio/high_mean": 0.0,
4320
+ "clip_ratio/low_mean": 0.0,
4321
+ "clip_ratio/low_min": 0.0,
4322
+ "clip_ratio/region_mean": 0.0,
4323
+ "completions/clipped_ratio": 1.0,
4324
+ "completions/max_length": 32.0,
4325
+ "completions/max_terminated_length": 0.0,
4326
+ "completions/mean_length": 32.0,
4327
+ "completions/mean_terminated_length": 0.0,
4328
+ "completions/min_length": 32.0,
4329
+ "completions/min_terminated_length": 0.0,
4330
+ "epoch": 0.15402843601895735,
4331
+ "grad_norm": 0.06662766635417938,
4332
+ "kl": 0.7509765625,
4333
+ "learning_rate": 6.612620797547087e-05,
4334
+ "loss": 0.03,
4335
+ "num_tokens": 1112649.0,
4336
+ "reward": 951.80498046875,
4337
+ "reward_std": 8.269881916046142,
4338
+ "rewards/concise_response_reward/mean": 10.0,
4339
+ "rewards/concise_response_reward/std": 0.0,
4340
+ "rewards/penalize_unwanted_words/mean": 1.0,
4341
+ "rewards/penalize_unwanted_words/std": 0.0,
4342
+ "rewards/reward_func_length/mean": 98.41374816894532,
4343
+ "rewards/reward_func_length/std": 1.2879498958587647,
4344
+ "rewards/reward_keyword_presence/mean": 0.0,
4345
+ "rewards/reward_keyword_presence/std": 0.0,
4346
+ "rewards/reward_short_answers/mean": 10.0,
4347
+ "rewards/reward_short_answers/std": 0.0,
4348
+ "step": 650
4349
+ },
4350
+ {
4351
+ "clip_ratio/high_max": 0.0,
4352
+ "clip_ratio/high_mean": 0.0,
4353
+ "clip_ratio/low_mean": 0.0,
4354
+ "clip_ratio/low_min": 0.0,
4355
+ "clip_ratio/region_mean": 0.0,
4356
+ "completions/clipped_ratio": 1.0,
4357
+ "completions/max_length": 32.0,
4358
+ "completions/max_terminated_length": 0.0,
4359
+ "completions/mean_length": 32.0,
4360
+ "completions/mean_terminated_length": 0.0,
4361
+ "completions/min_length": 32.0,
4362
+ "completions/min_terminated_length": 0.0,
4363
+ "epoch": 0.1552132701421801,
4364
+ "grad_norm": 0.08734223991632462,
4365
+ "kl": 0.721875,
4366
+ "learning_rate": 6.448930375918631e-05,
4367
+ "loss": 0.0289,
4368
+ "num_tokens": 1121563.0,
4369
+ "reward": 949.6842895507813,
4370
+ "reward_std": 10.617791748046875,
4371
+ "rewards/concise_response_reward/mean": 9.994318199157714,
4372
+ "rewards/concise_response_reward/std": 0.03214122056961059,
4373
+ "rewards/penalize_unwanted_words/mean": 1.0,
4374
+ "rewards/penalize_unwanted_words/std": 0.0,
4375
+ "rewards/reward_func_length/mean": 98.18624725341797,
4376
+ "rewards/reward_func_length/std": 1.4956665992736817,
4377
+ "rewards/reward_keyword_presence/mean": 0.0,
4378
+ "rewards/reward_keyword_presence/std": 0.0,
4379
+ "rewards/reward_short_answers/mean": 9.994318199157714,
4380
+ "rewards/reward_short_answers/std": 0.03214122056961059,
4381
+ "step": 655
4382
+ },
4383
+ {
4384
+ "clip_ratio/high_max": 0.0,
4385
+ "clip_ratio/high_mean": 0.0,
4386
+ "clip_ratio/low_mean": 0.0,
4387
+ "clip_ratio/low_min": 0.0,
4388
+ "clip_ratio/region_mean": 0.0,
4389
+ "completions/clipped_ratio": 1.0,
4390
+ "completions/max_length": 32.0,
4391
+ "completions/max_terminated_length": 0.0,
4392
+ "completions/mean_length": 32.0,
4393
+ "completions/mean_terminated_length": 0.0,
4394
+ "completions/min_length": 32.0,
4395
+ "completions/min_terminated_length": 0.0,
4396
+ "epoch": 0.15639810426540285,
4397
+ "grad_norm": 0.12375020235776901,
4398
+ "kl": 0.783984375,
4399
+ "learning_rate": 6.286321644497655e-05,
4400
+ "loss": 0.0313,
4401
+ "num_tokens": 1130465.0,
4402
+ "reward": 945.558056640625,
4403
+ "reward_std": 11.715877628326416,
4404
+ "rewards/concise_response_reward/mean": 9.982954597473144,
4405
+ "rewards/concise_response_reward/std": 0.0768566220998764,
4406
+ "rewards/penalize_unwanted_words/mean": 1.0,
4407
+ "rewards/penalize_unwanted_words/std": 0.0,
4408
+ "rewards/reward_func_length/mean": 97.74375,
4409
+ "rewards/reward_func_length/std": 1.8698417901992799,
4410
+ "rewards/reward_keyword_presence/mean": 0.0,
4411
+ "rewards/reward_keyword_presence/std": 0.0,
4412
+ "rewards/reward_short_answers/mean": 9.982954597473144,
4413
+ "rewards/reward_short_answers/std": 0.0768566220998764,
4414
+ "step": 660
4415
+ },
4416
+ {
4417
+ "clip_ratio/high_max": 0.0,
4418
+ "clip_ratio/high_mean": 0.0,
4419
+ "clip_ratio/low_mean": 0.0,
4420
+ "clip_ratio/low_min": 0.0,
4421
+ "clip_ratio/region_mean": 0.0,
4422
+ "completions/clipped_ratio": 1.0,
4423
+ "completions/max_length": 32.0,
4424
+ "completions/max_terminated_length": 0.0,
4425
+ "completions/mean_length": 32.0,
4426
+ "completions/mean_terminated_length": 0.0,
4427
+ "completions/min_length": 32.0,
4428
+ "completions/min_terminated_length": 0.0,
4429
+ "epoch": 0.15758293838862558,
4430
+ "grad_norm": 0.03891945630311966,
4431
+ "kl": 0.734765625,
4432
+ "learning_rate": 6.12484413547897e-05,
4433
+ "loss": 0.0294,
4434
+ "num_tokens": 1138789.0,
4435
+ "reward": 944.3750122070312,
4436
+ "reward_std": 10.409846115112305,
4437
+ "rewards/concise_response_reward/mean": 9.98863639831543,
4438
+ "rewards/concise_response_reward/std": 0.06428244113922119,
4439
+ "rewards/penalize_unwanted_words/mean": 1.0,
4440
+ "rewards/penalize_unwanted_words/std": 0.0,
4441
+ "rewards/reward_func_length/mean": 97.61249694824218,
4442
+ "rewards/reward_func_length/std": 1.8993584871292115,
4443
+ "rewards/reward_keyword_presence/mean": 0.0,
4444
+ "rewards/reward_keyword_presence/std": 0.0,
4445
+ "rewards/reward_short_answers/mean": 9.98863639831543,
4446
+ "rewards/reward_short_answers/std": 0.06428244113922119,
4447
+ "step": 665
4448
+ },
4449
+ {
4450
+ "clip_ratio/high_max": 0.0,
4451
+ "clip_ratio/high_mean": 0.0,
4452
+ "clip_ratio/low_mean": 0.0,
4453
+ "clip_ratio/low_min": 0.0,
4454
+ "clip_ratio/region_mean": 0.0,
4455
+ "completions/clipped_ratio": 1.0,
4456
+ "completions/max_length": 32.0,
4457
+ "completions/max_terminated_length": 0.0,
4458
+ "completions/mean_length": 32.0,
4459
+ "completions/mean_terminated_length": 0.0,
4460
+ "completions/min_length": 32.0,
4461
+ "completions/min_terminated_length": 0.0,
4462
+ "epoch": 0.15876777251184834,
4463
+ "grad_norm": 0.18572458624839783,
4464
+ "kl": 0.755859375,
4465
+ "learning_rate": 5.964547036476099e-05,
4466
+ "loss": 0.0302,
4467
+ "num_tokens": 1147299.0,
4468
+ "reward": 949.0652954101563,
4469
+ "reward_std": 10.223407363891601,
4470
+ "rewards/concise_response_reward/mean": 10.0,
4471
+ "rewards/concise_response_reward/std": 0.0,
4472
+ "rewards/penalize_unwanted_words/mean": 1.0,
4473
+ "rewards/penalize_unwanted_words/std": 0.0,
4474
+ "rewards/reward_func_length/mean": 98.11624908447266,
4475
+ "rewards/reward_func_length/std": 1.5753605127334596,
4476
+ "rewards/reward_keyword_presence/mean": 0.0,
4477
+ "rewards/reward_keyword_presence/std": 0.0,
4478
+ "rewards/reward_short_answers/mean": 10.0,
4479
+ "rewards/reward_short_answers/std": 0.0,
4480
+ "step": 670
4481
+ },
4482
+ {
4483
+ "clip_ratio/high_max": 0.0,
4484
+ "clip_ratio/high_mean": 0.0,
4485
+ "clip_ratio/low_mean": 0.0,
4486
+ "clip_ratio/low_min": 0.0,
4487
+ "clip_ratio/region_mean": 0.0,
4488
+ "completions/clipped_ratio": 1.0,
4489
+ "completions/max_length": 32.0,
4490
+ "completions/max_terminated_length": 0.0,
4491
+ "completions/mean_length": 32.0,
4492
+ "completions/mean_terminated_length": 0.0,
4493
+ "completions/min_length": 32.0,
4494
+ "completions/min_terminated_length": 0.0,
4495
+ "epoch": 0.15995260663507108,
4496
+ "grad_norm": 0.08468377590179443,
4497
+ "kl": 0.874609375,
4498
+ "learning_rate": 5.805479175538229e-05,
4499
+ "loss": 0.035,
4500
+ "num_tokens": 1155879.0,
4501
+ "reward": 946.0927978515625,
4502
+ "reward_std": 11.903859615325928,
4503
+ "rewards/concise_response_reward/mean": 9.994318199157714,
4504
+ "rewards/concise_response_reward/std": 0.03214122056961059,
4505
+ "rewards/penalize_unwanted_words/mean": 1.0,
4506
+ "rewards/penalize_unwanted_words/std": 0.0,
4507
+ "rewards/reward_func_length/mean": 97.79624481201172,
4508
+ "rewards/reward_func_length/std": 1.7525642395019532,
4509
+ "rewards/reward_keyword_presence/mean": 0.0,
4510
+ "rewards/reward_keyword_presence/std": 0.0,
4511
+ "rewards/reward_short_answers/mean": 9.994318199157714,
4512
+ "rewards/reward_short_answers/std": 0.03214122056961059,
4513
+ "step": 675
4514
+ },
4515
+ {
4516
+ "clip_ratio/high_max": 0.0,
4517
+ "clip_ratio/high_mean": 0.0,
4518
+ "clip_ratio/low_mean": 0.0,
4519
+ "clip_ratio/low_min": 0.0,
4520
+ "clip_ratio/region_mean": 0.0,
4521
+ "completions/clipped_ratio": 1.0,
4522
+ "completions/max_length": 32.0,
4523
+ "completions/max_terminated_length": 0.0,
4524
+ "completions/mean_length": 32.0,
4525
+ "completions/mean_terminated_length": 0.0,
4526
+ "completions/min_length": 32.0,
4527
+ "completions/min_terminated_length": 0.0,
4528
+ "epoch": 0.16113744075829384,
4529
+ "grad_norm": 0.1105046197772026,
4530
+ "kl": 0.725390625,
4531
+ "learning_rate": 5.647689006276726e-05,
4532
+ "loss": 0.029,
4533
+ "num_tokens": 1164413.0,
4534
+ "reward": 942.4379760742188,
4535
+ "reward_std": 12.221305656433106,
4536
+ "rewards/concise_response_reward/mean": 9.994318199157714,
4537
+ "rewards/concise_response_reward/std": 0.03214122056961059,
4538
+ "rewards/penalize_unwanted_words/mean": 1.0,
4539
+ "rewards/penalize_unwanted_words/std": 0.0,
4540
+ "rewards/reward_func_length/mean": 97.39937744140624,
4541
+ "rewards/reward_func_length/std": 2.0443582057952883,
4542
+ "rewards/reward_keyword_presence/mean": 0.0,
4543
+ "rewards/reward_keyword_presence/std": 0.0,
4544
+ "rewards/reward_short_answers/mean": 9.994318199157714,
4545
+ "rewards/reward_short_answers/std": 0.03214122056961059,
4546
+ "step": 680
4547
+ },
4548
+ {
4549
+ "clip_ratio/high_max": 0.0,
4550
+ "clip_ratio/high_mean": 0.0,
4551
+ "clip_ratio/low_mean": 0.0,
4552
+ "clip_ratio/low_min": 0.0,
4553
+ "clip_ratio/region_mean": 0.0,
4554
+ "completions/clipped_ratio": 1.0,
4555
+ "completions/max_length": 32.0,
4556
+ "completions/max_terminated_length": 0.0,
4557
+ "completions/mean_length": 32.0,
4558
+ "completions/mean_terminated_length": 0.0,
4559
+ "completions/min_length": 32.0,
4560
+ "completions/min_terminated_length": 0.0,
4561
+ "epoch": 0.1623222748815166,
4562
+ "grad_norm": 0.07082920521497726,
4563
+ "kl": 0.771875,
4564
+ "learning_rate": 5.491224593105695e-05,
4565
+ "loss": 0.0309,
4566
+ "num_tokens": 1173043.0,
4567
+ "reward": 944.4613525390625,
4568
+ "reward_std": 13.601359939575195,
4569
+ "rewards/concise_response_reward/mean": 9.98863639831543,
4570
+ "rewards/concise_response_reward/std": 0.06428244113922119,
4571
+ "rewards/penalize_unwanted_words/mean": 1.0,
4572
+ "rewards/penalize_unwanted_words/std": 0.0,
4573
+ "rewards/reward_func_length/mean": 97.62187347412109,
4574
+ "rewards/reward_func_length/std": 1.89430890083313,
4575
+ "rewards/reward_keyword_presence/mean": 0.0,
4576
+ "rewards/reward_keyword_presence/std": 0.0,
4577
+ "rewards/reward_short_answers/mean": 9.98863639831543,
4578
+ "rewards/reward_short_answers/std": 0.06428244113922119,
4579
+ "step": 685
4580
+ },
4581
+ {
4582
+ "clip_ratio/high_max": 0.0,
4583
+ "clip_ratio/high_mean": 0.0,
4584
+ "clip_ratio/low_mean": 0.0,
4585
+ "clip_ratio/low_min": 0.0,
4586
+ "clip_ratio/region_mean": 0.0,
4587
+ "completions/clipped_ratio": 1.0,
4588
+ "completions/max_length": 32.0,
4589
+ "completions/max_terminated_length": 0.0,
4590
+ "completions/mean_length": 32.0,
4591
+ "completions/mean_terminated_length": 0.0,
4592
+ "completions/min_length": 32.0,
4593
+ "completions/min_terminated_length": 0.0,
4594
+ "epoch": 0.16350710900473933,
4595
+ "grad_norm": 0.08692283183336258,
4596
+ "kl": 0.671484375,
4597
+ "learning_rate": 5.33613359660109e-05,
4598
+ "loss": 0.0269,
4599
+ "num_tokens": 1182055.0,
4600
+ "reward": 950.5879272460937,
4601
+ "reward_std": 8.786369895935058,
4602
+ "rewards/concise_response_reward/mean": 9.994318199157714,
4603
+ "rewards/concise_response_reward/std": 0.03214122056961059,
4604
+ "rewards/penalize_unwanted_words/mean": 1.0,
4605
+ "rewards/penalize_unwanted_words/std": 0.0,
4606
+ "rewards/reward_func_length/mean": 98.2843734741211,
4607
+ "rewards/reward_func_length/std": 1.3933659553527833,
4608
+ "rewards/reward_keyword_presence/mean": 0.0,
4609
+ "rewards/reward_keyword_presence/std": 0.0,
4610
+ "rewards/reward_short_answers/mean": 9.994318199157714,
4611
+ "rewards/reward_short_answers/std": 0.03214122056961059,
4612
+ "step": 690
4613
+ },
4614
+ {
4615
+ "clip_ratio/high_max": 0.0,
4616
+ "clip_ratio/high_mean": 0.0,
4617
+ "clip_ratio/low_mean": 0.0,
4618
+ "clip_ratio/low_min": 0.0,
4619
+ "clip_ratio/region_mean": 0.0,
4620
+ "completions/clipped_ratio": 1.0,
4621
+ "completions/max_length": 32.0,
4622
+ "completions/max_terminated_length": 0.0,
4623
+ "completions/mean_length": 32.0,
4624
+ "completions/mean_terminated_length": 0.0,
4625
+ "completions/min_length": 32.0,
4626
+ "completions/min_terminated_length": 0.0,
4627
+ "epoch": 0.1646919431279621,
4628
+ "grad_norm": 0.03239274397492409,
4629
+ "kl": 0.648828125,
4630
+ "learning_rate": 5.182463258982846e-05,
4631
+ "loss": 0.026,
4632
+ "num_tokens": 1190273.0,
4633
+ "reward": 952.5474487304688,
4634
+ "reward_std": 6.796604633331299,
4635
+ "rewards/concise_response_reward/mean": 10.0,
4636
+ "rewards/concise_response_reward/std": 0.0,
4637
+ "rewards/penalize_unwanted_words/mean": 1.0,
4638
+ "rewards/penalize_unwanted_words/std": 0.0,
4639
+ "rewards/reward_func_length/mean": 98.49437103271484,
4640
+ "rewards/reward_func_length/std": 1.1651976346969604,
4641
+ "rewards/reward_keyword_presence/mean": 0.0,
4642
+ "rewards/reward_keyword_presence/std": 0.0,
4643
+ "rewards/reward_short_answers/mean": 10.0,
4644
+ "rewards/reward_short_answers/std": 0.0,
4645
+ "step": 695
4646
+ },
4647
+ {
4648
+ "epoch": 0.16587677725118483,
4649
+ "grad_norm": 0.12050563842058182,
4650
+ "learning_rate": 5.0302603897244474e-05,
4651
+ "loss": 0.0256,
4652
+ "step": 700
4653
+ },
4654
+ {
4655
+ "epoch": 0.16587677725118483,
4656
+ "eval_clip_ratio/high_max": 0.0,
4657
+ "eval_clip_ratio/high_mean": 0.0,
4658
+ "eval_clip_ratio/low_mean": 0.0,
4659
+ "eval_clip_ratio/low_min": 0.0,
4660
+ "eval_clip_ratio/region_mean": 0.0,
4661
+ "eval_completions/clipped_ratio": 1.0,
4662
+ "eval_completions/max_length": 32.0,
4663
+ "eval_completions/max_terminated_length": 0.0,
4664
+ "eval_completions/mean_length": 32.0,
4665
+ "eval_completions/mean_terminated_length": 0.0,
4666
+ "eval_completions/min_length": 32.0,
4667
+ "eval_completions/min_terminated_length": 0.0,
4668
+ "eval_kl": 0.6245924581692913,
4669
+ "eval_loss": 0.02497861161828041,
4670
+ "eval_num_tokens": 1198575.0,
4671
+ "eval_reward": 955.6451142078309,
4672
+ "eval_reward_std": 4.155148893066748,
4673
+ "eval_rewards/concise_response_reward/mean": 9.998881535267266,
4674
+ "eval_rewards/concise_response_reward/std": 0.004473873304100488,
4675
+ "eval_rewards/penalize_unwanted_words/mean": 1.0,
4676
+ "eval_rewards/penalize_unwanted_words/std": 0.0,
4677
+ "eval_rewards/reward_func_length/mean": 98.83129651527705,
4678
+ "eval_rewards/reward_func_length/std": 0.8231426833828134,
4679
+ "eval_rewards/reward_keyword_presence/mean": 0.0,
4680
+ "eval_rewards/reward_keyword_presence/std": 0.0,
4681
+ "eval_rewards/reward_short_answers/mean": 9.998881535267266,
4682
+ "eval_rewards/reward_short_answers/std": 0.004473873304100488,
4683
+ "eval_runtime": 503.6669,
4684
+ "eval_samples_per_second": 4.021,
4685
+ "eval_steps_per_second": 0.252,
4686
+ "step": 700
4687
  }
4688
  ],
4689
  "logging_steps": 5,
4690
  "max_steps": 1000,
4691
+ "num_input_tokens_seen": 1198575,
4692
  "num_train_epochs": 1,
4693
  "save_steps": 100,
4694
  "stateful_callbacks": {