miniHui commited on
Commit
16f2d2c
·
verified ·
1 Parent(s): ceaf222

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/xu-chenhui-university-at-buffalo/huggingface/runs/6vym73va)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/xu-chenhui-university-at-buffalo/huggingface/runs/kngvriz9)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 4.388007793262409,
4
- "train_runtime": 12362.3849,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.607,
7
  "train_steps_per_second": 0.005
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.1775998589502455,
4
+ "train_runtime": 12344.1744,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.608,
7
  "train_steps_per_second": 0.005
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13c9b80e402dee3f491c30d70242b8d5a35474d993c3344f25eb00d47abcd75f
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e142400035f4987d6f51fdee88a9392b4bc9b4c81365d4a200cd51943231c557
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e103756e1b1e5795c03b9792af7eaa911ee339b9771e06f1f88a7b7d680fe56
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85c1946ad4f5cfc6524d240d49f8efc23fb6d34716bf51c9455158663d2b9c0c
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50f0495772f3bc2af5d23d70d6fda1f2446925f69440b84e5b306ddcd03fc170
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff4acc930ccb23d9a2ff03a77d8533cb803cf50654e36d748623845308eefd60
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0333313ee66af909e14a590df9353448e619bd785ff71ddc7baaebe5c17a9fda
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cc28e0df4f7aa580096a8cd5d178ffbca499490d9e8c70c6005fc668be6dbf0
3
  size 1089994880
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 4.388007793262409,
4
- "train_runtime": 12362.3849,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.607,
7
  "train_steps_per_second": 0.005
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.1775998589502455,
4
+ "train_runtime": 12344.1744,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.608,
7
  "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -12,7 +12,7 @@
12
  "clip_ratio": 0.0,
13
  "completion_length": 594.6719055175781,
14
  "epoch": 0.017057569296375266,
15
- "grad_norm": 0.3890414834022522,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
  "loss": -0.0015,
@@ -24,172 +24,172 @@
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
- "completion_length": 595.9648714065552,
28
  "epoch": 0.08528784648187633,
29
- "grad_norm": 0.4401928186416626,
30
- "kl": 0.00044229626655578613,
31
  "learning_rate": 2.5e-06,
32
- "loss": 0.0173,
33
- "reward": 0.6018415447324514,
34
- "reward_std": 0.34527207911014557,
35
- "rewards/accuracy_reward": 0.6018415447324514,
36
  "rewards/format_reward": 0.0,
37
  "step": 5
38
  },
39
  {
40
  "clip_ratio": 0.0,
41
- "completion_length": 615.8573936462402,
42
  "epoch": 0.17057569296375266,
43
- "grad_norm": 30.63307762145996,
44
- "kl": 984.0092399597168,
45
  "learning_rate": 2.956412726139078e-06,
46
- "loss": 50.2875,
47
- "reward": 0.6660714596509933,
48
- "reward_std": 0.3170145872980356,
49
- "rewards/accuracy_reward": 0.6660714596509933,
50
  "rewards/format_reward": 0.0,
51
  "step": 10
52
  },
53
  {
54
  "clip_ratio": 0.0,
55
- "completion_length": 607.8788246154785,
56
  "epoch": 0.255863539445629,
57
- "grad_norm": 0.2509593069553375,
58
- "kl": 3.2091552734375,
59
  "learning_rate": 2.7836719084521715e-06,
60
- "loss": 0.2167,
61
- "reward": 0.7511161014437675,
62
- "reward_std": 0.22802229821681977,
63
- "rewards/accuracy_reward": 0.7511161014437675,
64
  "rewards/format_reward": 0.0,
65
  "step": 15
66
  },
67
  {
68
  "clip_ratio": 0.0,
69
- "completion_length": 598.6384201049805,
70
  "epoch": 0.3411513859275053,
71
- "grad_norm": 0.41024672985076904,
72
- "kl": 0.00501861572265625,
73
  "learning_rate": 2.4946839873611927e-06,
74
- "loss": 0.0701,
75
- "reward": 0.7580357491970062,
76
- "reward_std": 0.2117175567895174,
77
- "rewards/accuracy_reward": 0.7580357491970062,
78
  "rewards/format_reward": 0.0,
79
  "step": 20
80
  },
81
  {
82
  "clip_ratio": 0.0,
83
- "completion_length": 600.18842086792,
84
  "epoch": 0.42643923240938164,
85
- "grad_norm": 0.7129307985305786,
86
- "kl": 0.01684722900390625,
87
  "learning_rate": 2.1156192081791355e-06,
88
- "loss": 0.05,
89
- "reward": 0.7569196775555611,
90
- "reward_std": 0.19429435413330792,
91
- "rewards/accuracy_reward": 0.7569196775555611,
92
  "rewards/format_reward": 0.0,
93
  "step": 25
94
  },
95
  {
96
  "clip_ratio": 0.0,
97
- "completion_length": 600.5020393371582,
98
  "epoch": 0.511727078891258,
99
- "grad_norm": 0.0800187885761261,
100
- "kl": 0.003276824951171875,
101
  "learning_rate": 1.6808050203829845e-06,
102
- "loss": 0.0427,
103
- "reward": 0.7566964626312256,
104
- "reward_std": 0.17697864044457673,
105
- "rewards/accuracy_reward": 0.7566964626312256,
106
  "rewards/format_reward": 0.0,
107
  "step": 30
108
  },
109
  {
110
  "clip_ratio": 0.0,
111
- "completion_length": 596.0426574707031,
112
  "epoch": 0.5970149253731343,
113
- "grad_norm": 0.23726417124271393,
114
- "kl": 0.00370025634765625,
115
  "learning_rate": 1.2296174432791415e-06,
116
- "loss": 0.0363,
117
- "reward": 0.7477678917348385,
118
- "reward_std": 0.18066087178885937,
119
- "rewards/accuracy_reward": 0.7477678917348385,
120
  "rewards/format_reward": 0.0,
121
  "step": 35
122
  },
123
  {
124
  "clip_ratio": 0.0,
125
- "completion_length": 582.9788246154785,
126
  "epoch": 0.6823027718550106,
127
- "grad_norm": 0.12389203161001205,
128
- "kl": 0.004132461547851562,
129
  "learning_rate": 8.029152419343472e-07,
130
- "loss": 0.0357,
131
- "reward": 0.7654018223285675,
132
- "reward_std": 0.17269687270745634,
133
- "rewards/accuracy_reward": 0.7654018223285675,
134
  "rewards/format_reward": 0.0,
135
  "step": 40
136
  },
137
  {
138
  "clip_ratio": 0.0,
139
- "completion_length": 602.0515853881836,
140
  "epoch": 0.767590618336887,
141
- "grad_norm": 0.19584427773952484,
142
- "kl": 0.0103118896484375,
143
  "learning_rate": 4.3933982822017883e-07,
144
- "loss": 0.0363,
145
- "reward": 0.7444196760654449,
146
- "reward_std": 0.19971248973160982,
147
- "rewards/accuracy_reward": 0.7444196760654449,
148
  "rewards/format_reward": 0.0,
149
  "step": 45
150
  },
151
  {
152
  "clip_ratio": 0.0,
153
- "completion_length": 607.6902076721192,
154
  "epoch": 0.8528784648187633,
155
- "grad_norm": 0.12652064859867096,
156
- "kl": 0.014777755737304688,
157
  "learning_rate": 1.718159615201853e-07,
158
- "loss": 0.0375,
159
- "reward": 0.7363839626312256,
160
- "reward_std": 0.19309807270765306,
161
- "rewards/accuracy_reward": 0.7363839626312256,
162
  "rewards/format_reward": 0.0,
163
  "step": 50
164
  },
165
  {
166
  "clip_ratio": 0.0,
167
- "completion_length": 596.0453399658203,
168
  "epoch": 0.9381663113006397,
169
- "grad_norm": 0.10281035304069519,
170
- "kl": 0.010649490356445312,
171
  "learning_rate": 2.4570139579284723e-08,
172
- "loss": 0.039,
173
- "reward": 0.7823661074042321,
174
- "reward_std": 0.18417297434061766,
175
- "rewards/accuracy_reward": 0.7823661074042321,
176
  "rewards/format_reward": 0.0,
177
  "step": 55
178
  },
179
  {
180
  "clip_ratio": 0.0,
181
- "completion_length": 590.6122868855795,
182
  "epoch": 0.9893390191897654,
183
- "kl": 0.014527638753255209,
184
- "reward": 0.7656250397364298,
185
- "reward_std": 0.1785029562500616,
186
- "rewards/accuracy_reward": 0.7656250397364298,
187
  "rewards/format_reward": 0.0,
188
  "step": 58,
189
  "total_flos": 0.0,
190
- "train_loss": 4.388007793262409,
191
- "train_runtime": 12362.3849,
192
- "train_samples_per_second": 0.607,
193
  "train_steps_per_second": 0.005
194
  }
195
  ],
 
12
  "clip_ratio": 0.0,
13
  "completion_length": 594.6719055175781,
14
  "epoch": 0.017057569296375266,
15
+ "grad_norm": 0.3901163935661316,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
  "loss": -0.0015,
 
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
+ "completion_length": 609.8289909362793,
28
  "epoch": 0.08528784648187633,
29
+ "grad_norm": 0.5891452431678772,
30
+ "kl": 0.0003406107425689697,
31
  "learning_rate": 2.5e-06,
32
+ "loss": 0.028,
33
+ "reward": 0.5943080652505159,
34
+ "reward_std": 0.35135408770293,
35
+ "rewards/accuracy_reward": 0.5943080652505159,
36
  "rewards/format_reward": 0.0,
37
  "step": 5
38
  },
39
  {
40
  "clip_ratio": 0.0,
41
+ "completion_length": 622.6977935791016,
42
  "epoch": 0.17057569296375266,
43
+ "grad_norm": 4.026884078979492,
44
+ "kl": 0.009809780120849609,
45
  "learning_rate": 2.956412726139078e-06,
46
+ "loss": 0.0682,
47
+ "reward": 0.6770089581608772,
48
+ "reward_std": 0.3084997434169054,
49
+ "rewards/accuracy_reward": 0.6770089581608772,
50
  "rewards/format_reward": 0.0,
51
  "step": 10
52
  },
53
  {
54
  "clip_ratio": 0.0,
55
+ "completion_length": 604.310961151123,
56
  "epoch": 0.255863539445629,
57
+ "grad_norm": 0.44674399495124817,
58
+ "kl": 30.434170150756835,
59
  "learning_rate": 2.7836719084521715e-06,
60
+ "loss": 1.5901,
61
+ "reward": 0.763392886519432,
62
+ "reward_std": 0.21235014032572508,
63
+ "rewards/accuracy_reward": 0.763392886519432,
64
  "rewards/format_reward": 0.0,
65
  "step": 15
66
  },
67
  {
68
  "clip_ratio": 0.0,
69
+ "completion_length": 595.2562767028809,
70
  "epoch": 0.3411513859275053,
71
+ "grad_norm": 0.13691851496696472,
72
+ "kl": 0.004651832580566406,
73
  "learning_rate": 2.4946839873611927e-06,
74
+ "loss": 0.0554,
75
+ "reward": 0.7613839641213417,
76
+ "reward_std": 0.21757735572755338,
77
+ "rewards/accuracy_reward": 0.7613839641213417,
78
  "rewards/format_reward": 0.0,
79
  "step": 20
80
  },
81
  {
82
  "clip_ratio": 0.0,
83
+ "completion_length": 601.6230163574219,
84
  "epoch": 0.42643923240938164,
85
+ "grad_norm": 0.20785608887672424,
86
+ "kl": 0.02872943878173828,
87
  "learning_rate": 2.1156192081791355e-06,
88
+ "loss": 0.0575,
89
+ "reward": 0.7502232536673545,
90
+ "reward_std": 0.2002022891305387,
91
+ "rewards/accuracy_reward": 0.7502232536673545,
92
  "rewards/format_reward": 0.0,
93
  "step": 25
94
  },
95
  {
96
  "clip_ratio": 0.0,
97
+ "completion_length": 603.0156471252442,
98
  "epoch": 0.511727078891258,
99
+ "grad_norm": 0.1560502052307129,
100
+ "kl": 0.00396270751953125,
101
  "learning_rate": 1.6808050203829845e-06,
102
+ "loss": 0.0352,
103
+ "reward": 0.7459821790456772,
104
+ "reward_std": 0.18388189654797316,
105
+ "rewards/accuracy_reward": 0.7459821790456772,
106
  "rewards/format_reward": 0.0,
107
  "step": 30
108
  },
109
  {
110
  "clip_ratio": 0.0,
111
+ "completion_length": 601.2908729553222,
112
  "epoch": 0.5970149253731343,
113
+ "grad_norm": 0.11733976751565933,
114
+ "kl": 0.04864578247070313,
115
  "learning_rate": 1.2296174432791415e-06,
116
+ "loss": 0.0487,
117
+ "reward": 0.7290178924798966,
118
+ "reward_std": 0.19413371523842216,
119
+ "rewards/accuracy_reward": 0.7290178924798966,
120
  "rewards/format_reward": 0.0,
121
  "step": 35
122
  },
123
  {
124
  "clip_ratio": 0.0,
125
+ "completion_length": 581.8761444091797,
126
  "epoch": 0.6823027718550106,
127
+ "grad_norm": 0.21926052868366241,
128
+ "kl": 0.005985260009765625,
129
  "learning_rate": 8.029152419343472e-07,
130
+ "loss": 0.049,
131
+ "reward": 0.7500000327825547,
132
+ "reward_std": 0.19939510114490985,
133
+ "rewards/accuracy_reward": 0.7500000327825547,
134
  "rewards/format_reward": 0.0,
135
  "step": 40
136
  },
137
  {
138
  "clip_ratio": 0.0,
139
+ "completion_length": 601.2828376770019,
140
  "epoch": 0.767590618336887,
141
+ "grad_norm": 0.22137849032878876,
142
+ "kl": 0.004445266723632812,
143
  "learning_rate": 4.3933982822017883e-07,
144
+ "loss": 0.043,
145
+ "reward": 0.7392857477068902,
146
+ "reward_std": 0.21266973707824946,
147
+ "rewards/accuracy_reward": 0.7392857477068902,
148
  "rewards/format_reward": 0.0,
149
  "step": 45
150
  },
151
  {
152
  "clip_ratio": 0.0,
153
+ "completion_length": 607.684846496582,
154
  "epoch": 0.8528784648187633,
155
+ "grad_norm": 0.12936115264892578,
156
+ "kl": 0.010895919799804688,
157
  "learning_rate": 1.718159615201853e-07,
158
+ "loss": 0.0298,
159
+ "reward": 0.7388393193483352,
160
+ "reward_std": 0.19434305084869266,
161
+ "rewards/accuracy_reward": 0.7388393193483352,
162
  "rewards/format_reward": 0.0,
163
  "step": 50
164
  },
165
  {
166
  "clip_ratio": 0.0,
167
+ "completion_length": 595.9553848266602,
168
  "epoch": 0.9381663113006397,
169
+ "grad_norm": 0.17488008737564087,
170
+ "kl": 0.0073909759521484375,
171
  "learning_rate": 2.4570139579284723e-08,
172
+ "loss": 0.0423,
173
+ "reward": 0.7638393193483353,
174
+ "reward_std": 0.2086773581802845,
175
+ "rewards/accuracy_reward": 0.7638393193483353,
176
  "rewards/format_reward": 0.0,
177
  "step": 55
178
  },
179
  {
180
  "clip_ratio": 0.0,
181
+ "completion_length": 583.5217819213867,
182
  "epoch": 0.9893390191897654,
183
+ "kl": 0.0069802602132161455,
184
+ "reward": 0.7615327710906664,
185
+ "reward_std": 0.1866085703174273,
186
+ "rewards/accuracy_reward": 0.7615327710906664,
187
  "rewards/format_reward": 0.0,
188
  "step": 58,
189
  "total_flos": 0.0,
190
+ "train_loss": 0.1775998589502455,
191
+ "train_runtime": 12344.1744,
192
+ "train_samples_per_second": 0.608,
193
  "train_steps_per_second": 0.005
194
  }
195
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40548b7ba1945e131c4fd6018bea3574757ea071c3c3d41188a11baf07d15fca
3
  size 7992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a0d090777e2139e561f164b5f8a1981f106c6db4ec7ebb318aac6848731f2fe
3
  size 7992