adpretko commited on
Commit
2a81b46
·
verified ·
1 Parent(s): 8ad5197

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -1
  2. all_results.json +8 -0
  3. train_results.json +8 -0
  4. trainer_state.json +253 -0
  5. training_loss.png +0 -0
README.md CHANGED
@@ -4,6 +4,7 @@ license: apache-2.0
4
  base_model: Qwen/Qwen2.5-Coder-1.5B-Instruct
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: ml815-model7
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # ml815-model7
17
 
18
- This model is a fine-tuned version of [Qwen/Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct) on an unknown dataset.
19
 
20
  ## Model description
21
 
 
4
  base_model: Qwen/Qwen2.5-Coder-1.5B-Instruct
5
  tags:
6
  - llama-factory
7
+ - full
8
  - generated_from_trainer
9
  model-index:
10
  - name: ml815-model7
 
16
 
17
  # ml815-model7
18
 
19
+ This model is a fine-tuned version of [Qwen/Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct) on the ml815 dataset.
20
 
21
  ## Model description
22
 
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 3.151751154761728e+17,
4
+ "train_loss": 0.07644283028868024,
5
+ "train_runtime": 2094.5752,
6
+ "train_samples_per_second": 4.72,
7
+ "train_steps_per_second": 0.148
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 3.151751154761728e+17,
4
+ "train_loss": 0.07644283028868024,
5
+ "train_runtime": 2094.5752,
6
+ "train_samples_per_second": 4.72,
7
+ "train_steps_per_second": 0.148
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 309,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.032362459546925564,
14
+ "grad_norm": 3.9182918071746826,
15
+ "learning_rate": 5.806451612903226e-06,
16
+ "loss": 0.5978,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.06472491909385113,
21
+ "grad_norm": 1.83750581741333,
22
+ "learning_rate": 1.2258064516129034e-05,
23
+ "loss": 0.2269,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.0970873786407767,
28
+ "grad_norm": 1.252933382987976,
29
+ "learning_rate": 1.870967741935484e-05,
30
+ "loss": 0.1207,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.12944983818770225,
35
+ "grad_norm": 0.8644864559173584,
36
+ "learning_rate": 1.9959162014075553e-05,
37
+ "loss": 0.0954,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.16181229773462782,
42
+ "grad_norm": 0.6043422222137451,
43
+ "learning_rate": 1.9793829188147406e-05,
44
+ "loss": 0.079,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.1941747572815534,
49
+ "grad_norm": 0.6312791705131531,
50
+ "learning_rate": 1.9503556665478066e-05,
51
+ "loss": 0.0817,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.22653721682847897,
56
+ "grad_norm": 0.6279880404472351,
57
+ "learning_rate": 1.9092047447238775e-05,
58
+ "loss": 0.0634,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.2588996763754045,
63
+ "grad_norm": 0.5388246178627014,
64
+ "learning_rate": 1.856455114887056e-05,
65
+ "loss": 0.059,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.2912621359223301,
70
+ "grad_norm": 0.46116387844085693,
71
+ "learning_rate": 1.792779703083777e-05,
72
+ "loss": 0.066,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.32362459546925565,
77
+ "grad_norm": 0.5152782201766968,
78
+ "learning_rate": 1.7189908153577473e-05,
79
+ "loss": 0.0594,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.3559870550161812,
84
+ "grad_norm": 0.569451630115509,
85
+ "learning_rate": 1.636029775176862e-05,
86
+ "loss": 0.0589,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.3883495145631068,
91
+ "grad_norm": 0.4153822362422943,
92
+ "learning_rate": 1.544954914987238e-05,
93
+ "loss": 0.0569,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.42071197411003236,
98
+ "grad_norm": 0.3604895770549774,
99
+ "learning_rate": 1.4469280750858854e-05,
100
+ "loss": 0.0528,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.45307443365695793,
105
+ "grad_norm": 0.380843847990036,
106
+ "learning_rate": 1.3431997820456592e-05,
107
+ "loss": 0.0488,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.4854368932038835,
112
+ "grad_norm": 0.38833051919937134,
113
+ "learning_rate": 1.2350932957710322e-05,
114
+ "loss": 0.047,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.517799352750809,
119
+ "grad_norm": 0.4645017683506012,
120
+ "learning_rate": 1.1239877286961123e-05,
121
+ "loss": 0.0457,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.5501618122977346,
126
+ "grad_norm": 0.3448844850063324,
127
+ "learning_rate": 1.01130045247298e-05,
128
+ "loss": 0.0499,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.5825242718446602,
133
+ "grad_norm": 0.47179466485977173,
134
+ "learning_rate": 8.98469016587892e-06,
135
+ "loss": 0.0439,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.6148867313915858,
140
+ "grad_norm": 0.3800075352191925,
141
+ "learning_rate": 7.869328095692313e-06,
142
+ "loss": 0.0429,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.6472491909385113,
147
+ "grad_norm": 0.361077219247818,
148
+ "learning_rate": 6.781146967348283e-06,
149
+ "loss": 0.0429,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.6796116504854369,
154
+ "grad_norm": 0.3779850900173187,
155
+ "learning_rate": 5.7340286872557515e-06,
156
+ "loss": 0.0368,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.7119741100323624,
161
+ "grad_norm": 0.2989918887615204,
162
+ "learning_rate": 4.7413313238324556e-06,
163
+ "loss": 0.0422,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.7443365695792881,
168
+ "grad_norm": 0.34742051362991333,
169
+ "learning_rate": 3.815718698874672e-06,
170
+ "loss": 0.0425,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.7766990291262136,
175
+ "grad_norm": 0.3241797685623169,
176
+ "learning_rate": 2.9689988354181742e-06,
177
+ "loss": 0.0383,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.8090614886731392,
182
+ "grad_norm": 0.3083023428916931,
183
+ "learning_rate": 2.211973323008041e-06,
184
+ "loss": 0.0424,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.8414239482200647,
189
+ "grad_norm": 0.4297429621219635,
190
+ "learning_rate": 1.5542995220217961e-06,
191
+ "loss": 0.042,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.8737864077669902,
196
+ "grad_norm": 0.3360403776168823,
197
+ "learning_rate": 1.0043673649027519e-06,
198
+ "loss": 0.0382,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.9061488673139159,
203
+ "grad_norm": 0.25952842831611633,
204
+ "learning_rate": 5.691923259479093e-07,
205
+ "loss": 0.038,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.9385113268608414,
210
+ "grad_norm": 0.2979937493801117,
211
+ "learning_rate": 2.5432592503288e-07,
212
+ "loss": 0.0386,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.970873786407767,
217
+ "grad_norm": 0.3307276666164398,
218
+ "learning_rate": 6.378490697611761e-08,
219
+ "loss": 0.0407,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 1.0,
224
+ "step": 309,
225
+ "total_flos": 3.151751154761728e+17,
226
+ "train_loss": 0.07644283028868024,
227
+ "train_runtime": 2094.5752,
228
+ "train_samples_per_second": 4.72,
229
+ "train_steps_per_second": 0.148
230
+ }
231
+ ],
232
+ "logging_steps": 10,
233
+ "max_steps": 309,
234
+ "num_input_tokens_seen": 0,
235
+ "num_train_epochs": 1,
236
+ "save_steps": 100,
237
+ "stateful_callbacks": {
238
+ "TrainerControl": {
239
+ "args": {
240
+ "should_epoch_stop": false,
241
+ "should_evaluate": false,
242
+ "should_log": false,
243
+ "should_save": true,
244
+ "should_training_stop": true
245
+ },
246
+ "attributes": {}
247
+ }
248
+ },
249
+ "total_flos": 3.151751154761728e+17,
250
+ "train_batch_size": 8,
251
+ "trial_name": null,
252
+ "trial_params": null
253
+ }
training_loss.png ADDED