Lines commited on
Commit
bfe838b
·
verified ·
1 Parent(s): 6c9a53e

Model save

Browse files
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2.5-1.5B-Instruct
3
+ library_name: transformers
4
+ model_name: Qwen2.5-1.5B-Open-R1-Distill
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - sft
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for Qwen2.5-1.5B-Open-R1-Distill
13
+
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="Lines/Qwen2.5-1.5B-Open-R1-Distill", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/lineshogan-bigai/huggingface/runs/h0plmf0y)
31
+
32
+
33
+ This model was trained with SFT.
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.16.0.dev0
38
+ - Transformers: 4.50.1
39
+ - Pytorch: 2.6.0+cu124
40
+ - Datasets: 3.4.1
41
+ - Tokenizers: 0.21.1
42
+
43
+ ## Citations
44
+
45
+
46
+
47
+ Cite TRL as:
48
+
49
+ ```bibtex
50
+ @misc{vonwerra2022trl,
51
+ title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
53
+ year = 2020,
54
+ journal = {GitHub repository},
55
+ publisher = {GitHub},
56
+ howpublished = {\url{https://github.com/huggingface/trl}}
57
+ }
58
+ ```
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 34173306634240.0,
3
+ "train_loss": 0.6066549909420502,
4
+ "train_runtime": 5973.3628,
5
+ "train_samples": 93733,
6
+ "train_samples_per_second": 1.674,
7
+ "train_steps_per_second": 0.052
8
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.50.1"
14
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 34173306634240.0,
3
+ "train_loss": 0.6066549909420502,
4
+ "train_runtime": 5973.3628,
5
+ "train_samples": 93733,
6
+ "train_samples_per_second": 1.674,
7
+ "train_steps_per_second": 0.052
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.9984,
6
+ "eval_steps": 500,
7
+ "global_step": 312,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.016,
14
+ "grad_norm": 2.1790294647216797,
15
+ "learning_rate": 1.5625e-05,
16
+ "loss": 0.8982,
17
+ "step": 5
18
+ },
19
+ {
20
+ "epoch": 0.032,
21
+ "grad_norm": 1.2921503782272339,
22
+ "learning_rate": 3.125e-05,
23
+ "loss": 0.7771,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.048,
28
+ "grad_norm": 1.0662598609924316,
29
+ "learning_rate": 4.6875e-05,
30
+ "loss": 0.7065,
31
+ "step": 15
32
+ },
33
+ {
34
+ "epoch": 0.064,
35
+ "grad_norm": 0.8291209936141968,
36
+ "learning_rate": 4.9979726739605334e-05,
37
+ "loss": 0.6569,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.08,
42
+ "grad_norm": 0.8745065927505493,
43
+ "learning_rate": 4.989742922931149e-05,
44
+ "loss": 0.6506,
45
+ "step": 25
46
+ },
47
+ {
48
+ "epoch": 0.096,
49
+ "grad_norm": 0.6823293566703796,
50
+ "learning_rate": 4.975207191995552e-05,
51
+ "loss": 0.6579,
52
+ "step": 30
53
+ },
54
+ {
55
+ "epoch": 0.112,
56
+ "grad_norm": 0.7768641114234924,
57
+ "learning_rate": 4.95440640639845e-05,
58
+ "loss": 0.6842,
59
+ "step": 35
60
+ },
61
+ {
62
+ "epoch": 0.128,
63
+ "grad_norm": 0.6644716262817383,
64
+ "learning_rate": 4.927399130600373e-05,
65
+ "loss": 0.6327,
66
+ "step": 40
67
+ },
68
+ {
69
+ "epoch": 0.144,
70
+ "grad_norm": 0.6596384644508362,
71
+ "learning_rate": 4.894261403389862e-05,
72
+ "loss": 0.6664,
73
+ "step": 45
74
+ },
75
+ {
76
+ "epoch": 0.16,
77
+ "grad_norm": 0.6109484434127808,
78
+ "learning_rate": 4.855086523796815e-05,
79
+ "loss": 0.6369,
80
+ "step": 50
81
+ },
82
+ {
83
+ "epoch": 0.176,
84
+ "grad_norm": 0.6302951574325562,
85
+ "learning_rate": 4.8099847884097434e-05,
86
+ "loss": 0.6217,
87
+ "step": 55
88
+ },
89
+ {
90
+ "epoch": 0.192,
91
+ "grad_norm": 0.6894915699958801,
92
+ "learning_rate": 4.7590831808365293e-05,
93
+ "loss": 0.6364,
94
+ "step": 60
95
+ },
96
+ {
97
+ "epoch": 0.208,
98
+ "grad_norm": 0.6988873481750488,
99
+ "learning_rate": 4.702525014183007e-05,
100
+ "loss": 0.6244,
101
+ "step": 65
102
+ },
103
+ {
104
+ "epoch": 0.224,
105
+ "grad_norm": 0.6387168765068054,
106
+ "learning_rate": 4.6404695275559475e-05,
107
+ "loss": 0.614,
108
+ "step": 70
109
+ },
110
+ {
111
+ "epoch": 0.24,
112
+ "grad_norm": 0.6547486186027527,
113
+ "learning_rate": 4.57309143772652e-05,
114
+ "loss": 0.6034,
115
+ "step": 75
116
+ },
117
+ {
118
+ "epoch": 0.256,
119
+ "grad_norm": 0.6504953503608704,
120
+ "learning_rate": 4.500580447216489e-05,
121
+ "loss": 0.6265,
122
+ "step": 80
123
+ },
124
+ {
125
+ "epoch": 0.272,
126
+ "grad_norm": 0.5533855557441711,
127
+ "learning_rate": 4.423140710192144e-05,
128
+ "loss": 0.6178,
129
+ "step": 85
130
+ },
131
+ {
132
+ "epoch": 0.288,
133
+ "grad_norm": 0.5795829892158508,
134
+ "learning_rate": 4.340990257669732e-05,
135
+ "loss": 0.6243,
136
+ "step": 90
137
+ },
138
+ {
139
+ "epoch": 0.304,
140
+ "grad_norm": 0.5757337808609009,
141
+ "learning_rate": 4.254360383650716e-05,
142
+ "loss": 0.605,
143
+ "step": 95
144
+ },
145
+ {
146
+ "epoch": 0.32,
147
+ "grad_norm": 0.6556103825569153,
148
+ "learning_rate": 4.163494993915196e-05,
149
+ "loss": 0.6046,
150
+ "step": 100
151
+ },
152
+ {
153
+ "epoch": 0.336,
154
+ "grad_norm": 0.6166912913322449,
155
+ "learning_rate": 4.0686499193069595e-05,
156
+ "loss": 0.6243,
157
+ "step": 105
158
+ },
159
+ {
160
+ "epoch": 0.352,
161
+ "grad_norm": 0.6043514609336853,
162
+ "learning_rate": 3.970092195443604e-05,
163
+ "loss": 0.6182,
164
+ "step": 110
165
+ },
166
+ {
167
+ "epoch": 0.368,
168
+ "grad_norm": 0.5503015518188477,
169
+ "learning_rate": 3.8680993108796956e-05,
170
+ "loss": 0.619,
171
+ "step": 115
172
+ },
173
+ {
174
+ "epoch": 0.384,
175
+ "grad_norm": 12.36988353729248,
176
+ "learning_rate": 3.7629584258397646e-05,
177
+ "loss": 0.5871,
178
+ "step": 120
179
+ },
180
+ {
181
+ "epoch": 0.4,
182
+ "grad_norm": 0.5975585579872131,
183
+ "learning_rate": 3.65496556372078e-05,
184
+ "loss": 0.6111,
185
+ "step": 125
186
+ },
187
+ {
188
+ "epoch": 0.416,
189
+ "grad_norm": 0.5625191330909729,
190
+ "learning_rate": 3.5444247776404274e-05,
191
+ "loss": 0.5999,
192
+ "step": 130
193
+ },
194
+ {
195
+ "epoch": 0.432,
196
+ "grad_norm": 0.5412049889564514,
197
+ "learning_rate": 3.4316472943777736e-05,
198
+ "loss": 0.5846,
199
+ "step": 135
200
+ },
201
+ {
202
+ "epoch": 0.448,
203
+ "grad_norm": 0.5721265077590942,
204
+ "learning_rate": 3.316950638116532e-05,
205
+ "loss": 0.5975,
206
+ "step": 140
207
+ },
208
+ {
209
+ "epoch": 0.464,
210
+ "grad_norm": 0.5761215090751648,
211
+ "learning_rate": 3.2006577364580284e-05,
212
+ "loss": 0.6136,
213
+ "step": 145
214
+ },
215
+ {
216
+ "epoch": 0.48,
217
+ "grad_norm": 0.5800392627716064,
218
+ "learning_rate": 3.083096011220896e-05,
219
+ "loss": 0.5969,
220
+ "step": 150
221
+ },
222
+ {
223
+ "epoch": 0.496,
224
+ "grad_norm": 0.5748460292816162,
225
+ "learning_rate": 2.9645964565873207e-05,
226
+ "loss": 0.6043,
227
+ "step": 155
228
+ },
229
+ {
230
+ "epoch": 0.512,
231
+ "grad_norm": 0.5393357872962952,
232
+ "learning_rate": 2.845492707191334e-05,
233
+ "loss": 0.5814,
234
+ "step": 160
235
+ },
236
+ {
237
+ "epoch": 0.528,
238
+ "grad_norm": 0.5506784319877625,
239
+ "learning_rate": 2.7261200987729242e-05,
240
+ "loss": 0.5676,
241
+ "step": 165
242
+ },
243
+ {
244
+ "epoch": 0.544,
245
+ "grad_norm": 0.5413019061088562,
246
+ "learning_rate": 2.606814724042701e-05,
247
+ "loss": 0.6135,
248
+ "step": 170
249
+ },
250
+ {
251
+ "epoch": 0.56,
252
+ "grad_norm": 0.5511438846588135,
253
+ "learning_rate": 2.4879124864153163e-05,
254
+ "loss": 0.5744,
255
+ "step": 175
256
+ },
257
+ {
258
+ "epoch": 0.576,
259
+ "grad_norm": 0.5734318494796753,
260
+ "learning_rate": 2.36974815427584e-05,
261
+ "loss": 0.5972,
262
+ "step": 180
263
+ },
264
+ {
265
+ "epoch": 0.592,
266
+ "grad_norm": 0.5379422307014465,
267
+ "learning_rate": 2.252654418441808e-05,
268
+ "loss": 0.5717,
269
+ "step": 185
270
+ },
271
+ {
272
+ "epoch": 0.608,
273
+ "grad_norm": 0.5357218980789185,
274
+ "learning_rate": 2.136960955474649e-05,
275
+ "loss": 0.6015,
276
+ "step": 190
277
+ },
278
+ {
279
+ "epoch": 0.624,
280
+ "grad_norm": 0.5256953835487366,
281
+ "learning_rate": 2.0229934994777195e-05,
282
+ "loss": 0.5913,
283
+ "step": 195
284
+ },
285
+ {
286
+ "epoch": 0.64,
287
+ "grad_norm": 0.5051993131637573,
288
+ "learning_rate": 1.911072924994306e-05,
289
+ "loss": 0.579,
290
+ "step": 200
291
+ },
292
+ {
293
+ "epoch": 0.656,
294
+ "grad_norm": 0.5518410205841064,
295
+ "learning_rate": 1.801514343587688e-05,
296
+ "loss": 0.5805,
297
+ "step": 205
298
+ },
299
+ {
300
+ "epoch": 0.672,
301
+ "grad_norm": 0.50865238904953,
302
+ "learning_rate": 1.6946262166468175e-05,
303
+ "loss": 0.5962,
304
+ "step": 210
305
+ },
306
+ {
307
+ "epoch": 0.688,
308
+ "grad_norm": 0.5044121146202087,
309
+ "learning_rate": 1.590709486915524e-05,
310
+ "loss": 0.5689,
311
+ "step": 215
312
+ },
313
+ {
314
+ "epoch": 0.704,
315
+ "grad_norm": 0.4880557060241699,
316
+ "learning_rate": 1.4900567311904107e-05,
317
+ "loss": 0.5671,
318
+ "step": 220
319
+ },
320
+ {
321
+ "epoch": 0.72,
322
+ "grad_norm": 0.5397293567657471,
323
+ "learning_rate": 1.392951336573011e-05,
324
+ "loss": 0.5693,
325
+ "step": 225
326
+ },
327
+ {
328
+ "epoch": 0.736,
329
+ "grad_norm": 0.5184421539306641,
330
+ "learning_rate": 1.2996667025954618e-05,
331
+ "loss": 0.5693,
332
+ "step": 230
333
+ },
334
+ {
335
+ "epoch": 0.752,
336
+ "grad_norm": 0.5067721009254456,
337
+ "learning_rate": 1.2104654714661188e-05,
338
+ "loss": 0.5693,
339
+ "step": 235
340
+ },
341
+ {
342
+ "epoch": 0.768,
343
+ "grad_norm": 0.4883371591567993,
344
+ "learning_rate": 1.1255987886023202e-05,
345
+ "loss": 0.5749,
346
+ "step": 240
347
+ },
348
+ {
349
+ "epoch": 0.784,
350
+ "grad_norm": 0.4902109205722809,
351
+ "learning_rate": 1.0453055955322938e-05,
352
+ "loss": 0.5791,
353
+ "step": 245
354
+ },
355
+ {
356
+ "epoch": 0.8,
357
+ "grad_norm": 0.4747011065483093,
358
+ "learning_rate": 9.698119571570258e-06,
359
+ "loss": 0.5795,
360
+ "step": 250
361
+ },
362
+ {
363
+ "epoch": 0.816,
364
+ "grad_norm": 0.4571741223335266,
365
+ "learning_rate": 8.993304252661744e-06,
366
+ "loss": 0.5657,
367
+ "step": 255
368
+ },
369
+ {
370
+ "epoch": 0.832,
371
+ "grad_norm": 0.48521485924720764,
372
+ "learning_rate": 8.340594401000496e-06,
373
+ "loss": 0.5773,
374
+ "step": 260
375
+ },
376
+ {
377
+ "epoch": 0.848,
378
+ "grad_norm": 0.4582931399345398,
379
+ "learning_rate": 7.741827716425654e-06,
380
+ "loss": 0.5822,
381
+ "step": 265
382
+ },
383
+ {
384
+ "epoch": 0.864,
385
+ "grad_norm": 0.4858649671077728,
386
+ "learning_rate": 7.198690022181837e-06,
387
+ "loss": 0.5826,
388
+ "step": 270
389
+ },
390
+ {
391
+ "epoch": 0.88,
392
+ "grad_norm": 0.49482160806655884,
393
+ "learning_rate": 6.712710518496049e-06,
394
+ "loss": 0.5588,
395
+ "step": 275
396
+ },
397
+ {
398
+ "epoch": 0.896,
399
+ "grad_norm": 0.4916088581085205,
400
+ "learning_rate": 6.285257477125605e-06,
401
+ "loss": 0.5716,
402
+ "step": 280
403
+ },
404
+ {
405
+ "epoch": 0.912,
406
+ "grad_norm": 0.4659317433834076,
407
+ "learning_rate": 5.9175343889989275e-06,
408
+ "loss": 0.5621,
409
+ "step": 285
410
+ },
411
+ {
412
+ "epoch": 0.928,
413
+ "grad_norm": 0.5164335370063782,
414
+ "learning_rate": 5.610576575795573e-06,
415
+ "loss": 0.55,
416
+ "step": 290
417
+ },
418
+ {
419
+ "epoch": 0.944,
420
+ "grad_norm": 0.5051558017730713,
421
+ "learning_rate": 5.36524827500562e-06,
422
+ "loss": 0.5709,
423
+ "step": 295
424
+ },
425
+ {
426
+ "epoch": 0.96,
427
+ "grad_norm": 0.4648708403110504,
428
+ "learning_rate": 5.182240206675272e-06,
429
+ "loss": 0.5661,
430
+ "step": 300
431
+ },
432
+ {
433
+ "epoch": 0.976,
434
+ "grad_norm": 0.47396937012672424,
435
+ "learning_rate": 5.06206762868959e-06,
436
+ "loss": 0.5582,
437
+ "step": 305
438
+ },
439
+ {
440
+ "epoch": 0.992,
441
+ "grad_norm": 0.46491411328315735,
442
+ "learning_rate": 5.005068886067688e-06,
443
+ "loss": 0.5557,
444
+ "step": 310
445
+ },
446
+ {
447
+ "epoch": 0.9984,
448
+ "step": 312,
449
+ "total_flos": 34173306634240.0,
450
+ "train_loss": 0.6066549909420502,
451
+ "train_runtime": 5973.3628,
452
+ "train_samples_per_second": 1.674,
453
+ "train_steps_per_second": 0.052
454
+ }
455
+ ],
456
+ "logging_steps": 5,
457
+ "max_steps": 312,
458
+ "num_input_tokens_seen": 0,
459
+ "num_train_epochs": 1,
460
+ "save_steps": 100,
461
+ "stateful_callbacks": {
462
+ "TrainerControl": {
463
+ "args": {
464
+ "should_epoch_stop": false,
465
+ "should_evaluate": false,
466
+ "should_log": false,
467
+ "should_save": true,
468
+ "should_training_stop": true
469
+ },
470
+ "attributes": {}
471
+ }
472
+ },
473
+ "total_flos": 34173306634240.0,
474
+ "train_batch_size": 2,
475
+ "trial_name": null,
476
+ "trial_params": null
477
+ }