ZMC2019 commited on
Commit
798cd0a
·
verified ·
1 Parent(s): 8f2aaf7

Model save

Browse files
Files changed (4) hide show
  1. README.md +4 -6
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +109 -193
README.md CHANGED
@@ -1,19 +1,17 @@
1
  ---
2
  base_model: Qwen/Qwen2.5-0.5B-Instruct
3
- datasets: YangZhoumill/bestofn
4
  library_name: transformers
5
- model_name: Qwen2.5-0.5B-Instruct-4230041
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - sft
11
  licence: license
12
  ---
13
 
14
- # Model Card for Qwen2.5-0.5B-Instruct-4230041
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on the [YangZhoumill/bestofn](https://huggingface.co/datasets/YangZhoumill/bestofn) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/stevenzhou0816100/huggingface/runs/vv5r4n2p)
33
 
34
 
35
  This model was trained with SFT.
 
1
  ---
2
  base_model: Qwen/Qwen2.5-0.5B-Instruct
 
3
  library_name: transformers
4
+ model_name: Qwen2.5-0.5B-Instruct
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - sft
9
  licence: license
10
  ---
11
 
12
+ # Model Card for Qwen2.5-0.5B-Instruct
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/stevenzhou0816100/huggingface/runs/hq1anqyw)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 5.27742227447808e+16,
3
- "train_loss": 0.6343651420258462,
4
- "train_runtime": 365.6281,
5
- "train_samples": 11477,
6
- "train_samples_per_second": 4.103,
7
- "train_steps_per_second": 0.514
8
  }
 
1
  {
2
+ "total_flos": 3.486616950459597e+16,
3
+ "train_loss": 0.9192673854827881,
4
+ "train_runtime": 250.2316,
5
+ "train_samples": 7473,
6
+ "train_samples_per_second": 3.996,
7
+ "train_steps_per_second": 0.5
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 5.27742227447808e+16,
3
- "train_loss": 0.6343651420258462,
4
- "train_runtime": 365.6281,
5
- "train_samples": 11477,
6
- "train_samples_per_second": 4.103,
7
- "train_steps_per_second": 0.514
8
  }
 
1
  {
2
+ "total_flos": 3.486616950459597e+16,
3
+ "train_loss": 0.9192673854827881,
4
+ "train_runtime": 250.2316,
5
+ "train_samples": 7473,
6
+ "train_samples_per_second": 3.996,
7
+ "train_steps_per_second": 0.5
8
  }
trainer_state.json CHANGED
@@ -4,282 +4,198 @@
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
  "eval_steps": 500,
7
- "global_step": 188,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.026595744680851064,
14
- "grad_norm": 67.0,
15
- "learning_rate": 2e-05,
16
- "loss": 2.9921,
17
  "step": 5
18
  },
19
  {
20
- "epoch": 0.05319148936170213,
21
- "grad_norm": 11.1875,
22
- "learning_rate": 4.5e-05,
23
- "loss": 1.2994,
24
  "step": 10
25
  },
26
  {
27
- "epoch": 0.0797872340425532,
28
- "grad_norm": 7.75,
29
- "learning_rate": 4.9943953065892183e-05,
30
- "loss": 0.9284,
31
  "step": 15
32
  },
33
  {
34
- "epoch": 0.10638297872340426,
35
- "grad_norm": 7.125,
36
- "learning_rate": 4.97167408627609e-05,
37
- "loss": 0.9604,
38
  "step": 20
39
  },
40
  {
41
- "epoch": 0.13297872340425532,
42
- "grad_norm": 5.40625,
43
- "learning_rate": 4.931662736686749e-05,
44
- "loss": 0.8497,
45
  "step": 25
46
  },
47
  {
48
- "epoch": 0.1595744680851064,
49
- "grad_norm": 8.3125,
50
- "learning_rate": 4.8746726452615006e-05,
51
- "loss": 0.835,
52
  "step": 30
53
  },
54
  {
55
- "epoch": 0.18617021276595744,
56
- "grad_norm": 11.1875,
57
- "learning_rate": 4.801147336122219e-05,
58
- "loss": 0.7549,
59
  "step": 35
60
  },
61
  {
62
- "epoch": 0.2127659574468085,
63
- "grad_norm": 7.78125,
64
- "learning_rate": 4.7116590183557117e-05,
65
- "loss": 0.7351,
66
  "step": 40
67
  },
68
  {
69
- "epoch": 0.2393617021276596,
70
- "grad_norm": 9.375,
71
- "learning_rate": 4.6069041328092e-05,
72
- "loss": 0.6336,
73
  "step": 45
74
  },
75
  {
76
- "epoch": 0.26595744680851063,
77
- "grad_norm": 10.625,
78
- "learning_rate": 4.487697932054875e-05,
79
- "loss": 0.7065,
80
  "step": 50
81
  },
82
  {
83
- "epoch": 0.2925531914893617,
84
- "grad_norm": 394.0,
85
- "learning_rate": 4.354968135704856e-05,
86
- "loss": 0.7266,
87
  "step": 55
88
  },
89
  {
90
- "epoch": 0.3191489361702128,
91
- "grad_norm": 632.0,
92
- "learning_rate": 4.209747710453884e-05,
93
- "loss": 0.7077,
94
  "step": 60
95
  },
96
  {
97
- "epoch": 0.34574468085106386,
98
- "grad_norm": 17.625,
99
- "learning_rate": 4.05316683103895e-05,
100
- "loss": 0.721,
101
  "step": 65
102
  },
103
  {
104
- "epoch": 0.3723404255319149,
105
- "grad_norm": 13.9375,
106
- "learning_rate": 3.886444084679482e-05,
107
- "loss": 0.5756,
108
  "step": 70
109
  },
110
  {
111
- "epoch": 0.39893617021276595,
112
- "grad_norm": 14.4375,
113
- "learning_rate": 3.710876987449346e-05,
114
- "loss": 0.5324,
115
  "step": 75
116
  },
117
  {
118
- "epoch": 0.425531914893617,
119
- "grad_norm": 24.0,
120
- "learning_rate": 3.527831886386802e-05,
121
- "loss": 0.5874,
122
  "step": 80
123
  },
124
  {
125
- "epoch": 0.4521276595744681,
126
- "grad_norm": 14.25,
127
- "learning_rate": 3.338733325928991e-05,
128
- "loss": 0.5228,
129
  "step": 85
130
  },
131
  {
132
- "epoch": 0.4787234042553192,
133
- "grad_norm": 24.875,
134
- "learning_rate": 3.1450529614264715e-05,
135
- "loss": 0.5005,
136
  "step": 90
137
  },
138
  {
139
- "epoch": 0.5053191489361702,
140
- "grad_norm": 15.75,
141
- "learning_rate": 2.9482981060181047e-05,
142
- "loss": 0.4858,
143
  "step": 95
144
  },
145
  {
146
- "epoch": 0.5319148936170213,
147
- "grad_norm": 21.125,
148
- "learning_rate": 2.7500000000000004e-05,
149
- "loss": 0.5236,
150
  "step": 100
151
  },
152
  {
153
- "epoch": 0.5585106382978723,
154
- "grad_norm": 10.3125,
155
- "learning_rate": 2.551701893981897e-05,
156
- "loss": 0.456,
157
  "step": 105
158
  },
159
  {
160
- "epoch": 0.5851063829787234,
161
- "grad_norm": 15.0,
162
- "learning_rate": 2.354947038573529e-05,
163
- "loss": 0.4441,
164
  "step": 110
165
  },
166
  {
167
- "epoch": 0.6117021276595744,
168
- "grad_norm": 11.1875,
169
- "learning_rate": 2.1612666740710093e-05,
170
- "loss": 0.4312,
171
  "step": 115
172
  },
173
  {
174
- "epoch": 0.6382978723404256,
175
- "grad_norm": 61.5,
176
- "learning_rate": 1.972168113613198e-05,
177
- "loss": 0.4592,
178
  "step": 120
179
  },
180
  {
181
- "epoch": 0.6648936170212766,
182
- "grad_norm": 18.25,
183
- "learning_rate": 1.7891230125506537e-05,
184
- "loss": 0.4386,
185
  "step": 125
186
  },
187
- {
188
- "epoch": 0.6914893617021277,
189
- "grad_norm": 64.0,
190
- "learning_rate": 1.6135559153205187e-05,
191
- "loss": 0.4423,
192
- "step": 130
193
- },
194
- {
195
- "epoch": 0.7180851063829787,
196
- "grad_norm": 10.0,
197
- "learning_rate": 1.4468331689610495e-05,
198
- "loss": 0.4148,
199
- "step": 135
200
- },
201
- {
202
- "epoch": 0.7446808510638298,
203
- "grad_norm": 8.1875,
204
- "learning_rate": 1.2902522895461167e-05,
205
- "loss": 0.3673,
206
- "step": 140
207
- },
208
- {
209
- "epoch": 0.7712765957446809,
210
- "grad_norm": 7.65625,
211
- "learning_rate": 1.1450318642951452e-05,
212
- "loss": 0.3963,
213
- "step": 145
214
- },
215
- {
216
- "epoch": 0.7978723404255319,
217
- "grad_norm": 7.25,
218
- "learning_rate": 1.0123020679451254e-05,
219
- "loss": 0.4026,
220
- "step": 150
221
- },
222
- {
223
- "epoch": 0.824468085106383,
224
- "grad_norm": 18.375,
225
- "learning_rate": 8.930958671908013e-06,
226
- "loss": 0.4144,
227
- "step": 155
228
- },
229
- {
230
- "epoch": 0.851063829787234,
231
- "grad_norm": 8.5,
232
- "learning_rate": 7.883409816442882e-06,
233
- "loss": 0.3811,
234
- "step": 160
235
- },
236
- {
237
- "epoch": 0.8776595744680851,
238
- "grad_norm": 14.3125,
239
- "learning_rate": 6.988526638777812e-06,
240
- "loss": 0.4083,
241
- "step": 165
242
- },
243
- {
244
- "epoch": 0.9042553191489362,
245
- "grad_norm": 34.75,
246
- "learning_rate": 6.253273547384997e-06,
247
- "loss": 0.4174,
248
- "step": 170
249
- },
250
- {
251
- "epoch": 0.9308510638297872,
252
- "grad_norm": 13.3125,
253
- "learning_rate": 5.683372633132516e-06,
254
- "loss": 0.3967,
255
- "step": 175
256
- },
257
- {
258
- "epoch": 0.9574468085106383,
259
- "grad_norm": 14.25,
260
- "learning_rate": 5.283259137239111e-06,
261
- "loss": 0.3973,
262
- "step": 180
263
- },
264
- {
265
- "epoch": 0.9840425531914894,
266
- "grad_norm": 18.125,
267
- "learning_rate": 5.056046934107815e-06,
268
- "loss": 0.3972,
269
- "step": 185
270
- },
271
  {
272
  "epoch": 1.0,
273
- "step": 188,
274
- "total_flos": 5.27742227447808e+16,
275
- "train_loss": 0.6343651420258462,
276
- "train_runtime": 365.6281,
277
- "train_samples_per_second": 4.103,
278
- "train_steps_per_second": 0.514
279
  }
280
  ],
281
  "logging_steps": 5,
282
- "max_steps": 188,
283
  "num_input_tokens_seen": 0,
284
  "num_train_epochs": 1,
285
  "save_steps": 500,
@@ -295,7 +211,7 @@
295
  "attributes": {}
296
  }
297
  },
298
- "total_flos": 5.27742227447808e+16,
299
  "train_batch_size": 2,
300
  "trial_name": null,
301
  "trial_params": null
 
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 125,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.04,
14
+ "grad_norm": 62.0,
15
+ "learning_rate": 2.857142857142857e-05,
16
+ "loss": 3.2566,
17
  "step": 5
18
  },
19
  {
20
+ "epoch": 0.08,
21
+ "grad_norm": 9.625,
22
+ "learning_rate": 4.996811065272715e-05,
23
+ "loss": 1.5876,
24
  "step": 10
25
  },
26
  {
27
+ "epoch": 0.12,
28
+ "grad_norm": 7.28125,
29
+ "learning_rate": 4.9610392803331726e-05,
30
+ "loss": 1.2605,
31
  "step": 15
32
  },
33
  {
34
+ "epoch": 0.16,
35
+ "grad_norm": 49.75,
36
+ "learning_rate": 4.8861446190538576e-05,
37
+ "loss": 1.1706,
38
  "step": 20
39
  },
40
  {
41
+ "epoch": 0.2,
42
+ "grad_norm": 8.8125,
43
+ "learning_rate": 4.7734522928852436e-05,
44
+ "loss": 1.0933,
45
  "step": 25
46
  },
47
  {
48
+ "epoch": 0.24,
49
+ "grad_norm": 9.75,
50
+ "learning_rate": 4.624956317935659e-05,
51
+ "loss": 1.0288,
52
  "step": 30
53
  },
54
  {
55
+ "epoch": 0.28,
56
+ "grad_norm": 7.59375,
57
+ "learning_rate": 4.443284232176311e-05,
58
+ "loss": 0.9685,
59
  "step": 35
60
  },
61
  {
62
+ "epoch": 0.32,
63
+ "grad_norm": 10.4375,
64
+ "learning_rate": 4.2316506028963374e-05,
65
+ "loss": 0.89,
66
  "step": 40
67
  },
68
  {
69
+ "epoch": 0.36,
70
+ "grad_norm": 9.625,
71
+ "learning_rate": 3.993800147062685e-05,
72
+ "loss": 0.8948,
73
  "step": 45
74
  },
75
  {
76
+ "epoch": 0.4,
77
+ "grad_norm": 16.25,
78
+ "learning_rate": 3.733941471032425e-05,
79
+ "loss": 0.85,
80
  "step": 50
81
  },
82
  {
83
+ "epoch": 0.44,
84
+ "grad_norm": 59.25,
85
+ "learning_rate": 3.4566726020493854e-05,
86
+ "loss": 0.8486,
87
  "step": 55
88
  },
89
  {
90
+ "epoch": 0.48,
91
+ "grad_norm": 78.5,
92
+ "learning_rate": 3.1668996291960073e-05,
93
+ "loss": 0.7863,
94
  "step": 60
95
  },
96
  {
97
+ "epoch": 0.52,
98
+ "grad_norm": 14.5625,
99
+ "learning_rate": 2.869749893394902e-05,
100
+ "loss": 0.7641,
101
  "step": 65
102
  },
103
  {
104
+ "epoch": 0.56,
105
+ "grad_norm": 9.5,
106
+ "learning_rate": 2.570481262505563e-05,
107
+ "loss": 0.7131,
108
  "step": 70
109
  },
110
  {
111
+ "epoch": 0.6,
112
+ "grad_norm": 11.25,
113
+ "learning_rate": 2.2743890968333453e-05,
114
+ "loss": 0.6848,
115
  "step": 75
116
  },
117
  {
118
+ "epoch": 0.64,
119
+ "grad_norm": 13.0625,
120
+ "learning_rate": 1.986712551234432e-05,
121
+ "loss": 0.6762,
122
  "step": 80
123
  },
124
  {
125
+ "epoch": 0.68,
126
+ "grad_norm": 10.625,
127
+ "learning_rate": 1.7125418717390167e-05,
128
+ "loss": 0.643,
129
  "step": 85
130
  },
131
  {
132
+ "epoch": 0.72,
133
+ "grad_norm": 13.375,
134
+ "learning_rate": 1.4567283270175847e-05,
135
+ "loss": 0.6211,
136
  "step": 90
137
  },
138
  {
139
+ "epoch": 0.76,
140
+ "grad_norm": 15.375,
141
+ "learning_rate": 1.2237983683933638e-05,
142
+ "loss": 0.6132,
143
  "step": 95
144
  },
145
  {
146
+ "epoch": 0.8,
147
+ "grad_norm": 11.9375,
148
+ "learning_rate": 1.0178735372827107e-05,
149
+ "loss": 0.6187,
150
  "step": 100
151
  },
152
  {
153
+ "epoch": 0.84,
154
+ "grad_norm": 23.75,
155
+ "learning_rate": 8.425975372482405e-06,
156
+ "loss": 0.6055,
157
  "step": 105
158
  },
159
  {
160
+ "epoch": 0.88,
161
+ "grad_norm": 13.1875,
162
+ "learning_rate": 7.010717610764453e-06,
163
+ "loss": 0.6133,
164
  "step": 110
165
  },
166
  {
167
+ "epoch": 0.92,
168
+ "grad_norm": 12.125,
169
+ "learning_rate": 5.9580041368548775e-06,
170
+ "loss": 0.6015,
171
  "step": 115
172
  },
173
  {
174
+ "epoch": 0.96,
175
+ "grad_norm": 10.1875,
176
+ "learning_rate": 5.286462018769748e-06,
177
+ "loss": 0.5994,
178
  "step": 120
179
  },
180
  {
181
+ "epoch": 1.0,
182
+ "grad_norm": 12.9375,
183
+ "learning_rate": 5.007973749722316e-06,
184
+ "loss": 0.5922,
185
  "step": 125
186
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  {
188
  "epoch": 1.0,
189
+ "step": 125,
190
+ "total_flos": 3.486616950459597e+16,
191
+ "train_loss": 0.9192673854827881,
192
+ "train_runtime": 250.2316,
193
+ "train_samples_per_second": 3.996,
194
+ "train_steps_per_second": 0.5
195
  }
196
  ],
197
  "logging_steps": 5,
198
+ "max_steps": 125,
199
  "num_input_tokens_seen": 0,
200
  "num_train_epochs": 1,
201
  "save_steps": 500,
 
211
  "attributes": {}
212
  }
213
  },
214
+ "total_flos": 3.486616950459597e+16,
215
  "train_batch_size": 2,
216
  "trial_name": null,
217
  "trial_params": null