Blancy commited on
Commit
c4d1fa0
·
verified ·
1 Parent(s): 89e86a5

Model save

Browse files
Files changed (4) hide show
  1. README.md +1 -3
  2. all_results.json +3 -3
  3. train_results.json +3 -3
  4. trainer_state.json +24 -24
README.md CHANGED
@@ -1,10 +1,8 @@
1
  ---
2
- datasets: Blancy/verifiable-coding-problems-SFT
3
  library_name: transformers
4
  model_name: Qwen3-0.6B-Open-R1-Distill
5
  tags:
6
  - generated_from_trainer
7
- - open-r1
8
  - trl
9
  - sft
10
  licence: license
@@ -12,7 +10,7 @@ licence: license
12
 
13
  # Model Card for Qwen3-0.6B-Open-R1-Distill
14
 
15
- This model is a fine-tuned version of [None](https://huggingface.co/None) on the [Blancy/verifiable-coding-problems-SFT](https://huggingface.co/datasets/Blancy/verifiable-coding-problems-SFT) dataset.
16
  It has been trained using [TRL](https://github.com/huggingface/trl).
17
 
18
  ## Quick start
 
1
  ---
 
2
  library_name: transformers
3
  model_name: Qwen3-0.6B-Open-R1-Distill
4
  tags:
5
  - generated_from_trainer
 
6
  - trl
7
  - sft
8
  licence: license
 
10
 
11
  # Model Card for Qwen3-0.6B-Open-R1-Distill
12
 
13
+ This model is a fine-tuned version of [None](https://huggingface.co/None).
14
  It has been trained using [TRL](https://github.com/huggingface/trl).
15
 
16
  ## Quick start
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 1.0840722057776333e+17,
3
- "train_loss": 0.09704206266788522,
4
- "train_runtime": 144.0286,
5
  "train_samples": 1086,
6
- "train_samples_per_second": 86.92,
7
  "train_steps_per_second": 0.687
8
  }
 
1
  {
2
  "total_flos": 1.0840722057776333e+17,
3
+ "train_loss": 0.09704786779904606,
4
+ "train_runtime": 144.1296,
5
  "train_samples": 1086,
6
+ "train_samples_per_second": 86.859,
7
  "train_steps_per_second": 0.687
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 1.0840722057776333e+17,
3
- "train_loss": 0.09704206266788522,
4
- "train_runtime": 144.0286,
5
  "train_samples": 1086,
6
- "train_samples_per_second": 86.92,
7
  "train_steps_per_second": 0.687
8
  }
 
1
  {
2
  "total_flos": 1.0840722057776333e+17,
3
+ "train_loss": 0.09704786779904606,
4
+ "train_runtime": 144.1296,
5
  "train_samples": 1086,
6
+ "train_samples_per_second": 86.859,
7
  "train_steps_per_second": 0.687
8
  }
trainer_state.json CHANGED
@@ -11,14 +11,14 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.15151515151515152,
14
- "grad_norm": 0.7734375,
15
  "learning_rate": 2.4e-05,
16
  "loss": 0.1367,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.30303030303030304,
21
- "grad_norm": 0.443359375,
22
  "learning_rate": 2.9879546090089533e-05,
23
  "loss": 0.1117,
24
  "step": 10
@@ -32,112 +32,112 @@
32
  },
33
  {
34
  "epoch": 0.6060606060606061,
35
- "grad_norm": 0.2451171875,
36
  "learning_rate": 2.8549004284108398e-05,
37
  "loss": 0.099,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.7575757575757576,
42
- "grad_norm": 0.2275390625,
43
  "learning_rate": 2.7368445717222102e-05,
44
  "loss": 0.0965,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.9090909090909091,
49
- "grad_norm": 0.2197265625,
50
  "learning_rate": 2.5885097773607675e-05,
51
  "loss": 0.0973,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.0606060606060606,
56
- "grad_norm": 0.2265625,
57
  "learning_rate": 2.4140285773463036e-05,
58
  "loss": 0.0936,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.2121212121212122,
63
- "grad_norm": 0.2216796875,
64
  "learning_rate": 2.2182619292782524e-05,
65
  "loss": 0.0961,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.3636363636363638,
70
- "grad_norm": 0.205078125,
71
  "learning_rate": 2.0066637925262362e-05,
72
- "loss": 0.093,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.5151515151515151,
77
- "grad_norm": 0.216796875,
78
  "learning_rate": 1.7851291836925332e-05,
79
- "loss": 0.0935,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 1.6666666666666665,
84
- "grad_norm": 0.2138671875,
85
  "learning_rate": 1.559829944444086e-05,
86
  "loss": 0.0932,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 1.8181818181818183,
91
- "grad_norm": 0.2080078125,
92
  "learning_rate": 1.3370427971388369e-05,
93
  "loss": 0.0933,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 1.9696969696969697,
98
- "grad_norm": 0.2294921875,
99
  "learning_rate": 1.1229744785292821e-05,
100
- "loss": 0.0923,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 2.121212121212121,
105
- "grad_norm": 0.1982421875,
106
  "learning_rate": 9.235888232294472e-06,
107
  "loss": 0.092,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 2.2727272727272725,
112
- "grad_norm": 0.212890625,
113
  "learning_rate": 7.444406143120487e-06,
114
  "loss": 0.0904,
115
  "step": 75
116
  },
117
  {
118
  "epoch": 2.4242424242424243,
119
- "grad_norm": 0.216796875,
120
  "learning_rate": 5.9052082987380775e-06,
121
  "loss": 0.0926,
122
  "step": 80
123
  },
124
  {
125
  "epoch": 2.5757575757575757,
126
- "grad_norm": 0.20703125,
127
  "learning_rate": 4.6611759692099345e-06,
128
  "loss": 0.0925,
129
  "step": 85
130
  },
131
  {
132
  "epoch": 2.7272727272727275,
133
- "grad_norm": 0.2109375,
134
  "learning_rate": 3.746967263293098e-06,
135
  "loss": 0.0932,
136
  "step": 90
137
  },
138
  {
139
  "epoch": 2.878787878787879,
140
- "grad_norm": 0.2119140625,
141
  "learning_rate": 3.188051571134615e-06,
142
  "loss": 0.0916,
143
  "step": 95
@@ -146,9 +146,9 @@
146
  "epoch": 3.0,
147
  "step": 99,
148
  "total_flos": 1.0840722057776333e+17,
149
- "train_loss": 0.09704206266788522,
150
- "train_runtime": 144.0286,
151
- "train_samples_per_second": 86.92,
152
  "train_steps_per_second": 0.687
153
  }
154
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.15151515151515152,
14
+ "grad_norm": 0.77734375,
15
  "learning_rate": 2.4e-05,
16
  "loss": 0.1367,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.30303030303030304,
21
+ "grad_norm": 0.44140625,
22
  "learning_rate": 2.9879546090089533e-05,
23
  "loss": 0.1117,
24
  "step": 10
 
32
  },
33
  {
34
  "epoch": 0.6060606060606061,
35
+ "grad_norm": 0.244140625,
36
  "learning_rate": 2.8549004284108398e-05,
37
  "loss": 0.099,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.7575757575757576,
42
+ "grad_norm": 0.228515625,
43
  "learning_rate": 2.7368445717222102e-05,
44
  "loss": 0.0965,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.9090909090909091,
49
+ "grad_norm": 0.220703125,
50
  "learning_rate": 2.5885097773607675e-05,
51
  "loss": 0.0973,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.0606060606060606,
56
+ "grad_norm": 0.2255859375,
57
  "learning_rate": 2.4140285773463036e-05,
58
  "loss": 0.0936,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.2121212121212122,
63
+ "grad_norm": 0.2138671875,
64
  "learning_rate": 2.2182619292782524e-05,
65
  "loss": 0.0961,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.3636363636363638,
70
+ "grad_norm": 0.2041015625,
71
  "learning_rate": 2.0066637925262362e-05,
72
+ "loss": 0.0931,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.5151515151515151,
77
+ "grad_norm": 0.2177734375,
78
  "learning_rate": 1.7851291836925332e-05,
79
+ "loss": 0.0936,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 1.6666666666666665,
84
+ "grad_norm": 0.2119140625,
85
  "learning_rate": 1.559829944444086e-05,
86
  "loss": 0.0932,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 1.8181818181818183,
91
+ "grad_norm": 0.2099609375,
92
  "learning_rate": 1.3370427971388369e-05,
93
  "loss": 0.0933,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 1.9696969696969697,
98
+ "grad_norm": 0.2275390625,
99
  "learning_rate": 1.1229744785292821e-05,
100
+ "loss": 0.0924,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 2.121212121212121,
105
+ "grad_norm": 0.197265625,
106
  "learning_rate": 9.235888232294472e-06,
107
  "loss": 0.092,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 2.2727272727272725,
112
+ "grad_norm": 0.20703125,
113
  "learning_rate": 7.444406143120487e-06,
114
  "loss": 0.0904,
115
  "step": 75
116
  },
117
  {
118
  "epoch": 2.4242424242424243,
119
+ "grad_norm": 0.220703125,
120
  "learning_rate": 5.9052082987380775e-06,
121
  "loss": 0.0926,
122
  "step": 80
123
  },
124
  {
125
  "epoch": 2.5757575757575757,
126
+ "grad_norm": 0.2080078125,
127
  "learning_rate": 4.6611759692099345e-06,
128
  "loss": 0.0925,
129
  "step": 85
130
  },
131
  {
132
  "epoch": 2.7272727272727275,
133
+ "grad_norm": 0.2099609375,
134
  "learning_rate": 3.746967263293098e-06,
135
  "loss": 0.0932,
136
  "step": 90
137
  },
138
  {
139
  "epoch": 2.878787878787879,
140
+ "grad_norm": 0.2109375,
141
  "learning_rate": 3.188051571134615e-06,
142
  "loss": 0.0916,
143
  "step": 95
 
146
  "epoch": 3.0,
147
  "step": 99,
148
  "total_flos": 1.0840722057776333e+17,
149
+ "train_loss": 0.09704786779904606,
150
+ "train_runtime": 144.1296,
151
+ "train_samples_per_second": 86.859,
152
  "train_steps_per_second": 0.687
153
  }
154
  ],