adpretko commited on
Commit
d773cbd
·
verified ·
1 Parent(s): b4afb49

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -1
  2. all_results.json +8 -0
  3. train_results.json +8 -0
  4. trainer_state.json +253 -0
  5. training_loss.png +0 -0
README.md CHANGED
@@ -4,6 +4,7 @@ license: apache-2.0
4
  base_model: Qwen/Qwen2.5-Coder-1.5B-Instruct
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: ml815-model9
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # ml815-model9
17
 
18
- This model is a fine-tuned version of [Qwen/Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct) on an unknown dataset.
19
 
20
  ## Model description
21
 
 
4
  base_model: Qwen/Qwen2.5-Coder-1.5B-Instruct
5
  tags:
6
  - llama-factory
7
+ - full
8
  - generated_from_trainer
9
  model-index:
10
  - name: ml815-model9
 
16
 
17
  # ml815-model9
18
 
19
+ This model is a fine-tuned version of [Qwen/Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct) on the ml815 dataset.
20
 
21
  ## Model description
22
 
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 3.151751154761728e+17,
4
+ "train_loss": 0.0763875040132251,
5
+ "train_runtime": 2448.9622,
6
+ "train_samples_per_second": 4.037,
7
+ "train_steps_per_second": 0.126
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 3.151751154761728e+17,
4
+ "train_loss": 0.0763875040132251,
5
+ "train_runtime": 2448.9622,
6
+ "train_samples_per_second": 4.037,
7
+ "train_steps_per_second": 0.126
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 309,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.032362459546925564,
14
+ "grad_norm": 3.9168577194213867,
15
+ "learning_rate": 5.806451612903226e-06,
16
+ "loss": 0.598,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.06472491909385113,
21
+ "grad_norm": 1.8480807542800903,
22
+ "learning_rate": 1.2258064516129034e-05,
23
+ "loss": 0.2239,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.0970873786407767,
28
+ "grad_norm": 0.9047099351882935,
29
+ "learning_rate": 1.870967741935484e-05,
30
+ "loss": 0.1209,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.12944983818770225,
35
+ "grad_norm": 0.9319924712181091,
36
+ "learning_rate": 1.9959162014075553e-05,
37
+ "loss": 0.0953,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.16181229773462782,
42
+ "grad_norm": 0.5594499707221985,
43
+ "learning_rate": 1.9793829188147406e-05,
44
+ "loss": 0.0801,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.1941747572815534,
49
+ "grad_norm": 0.6101599931716919,
50
+ "learning_rate": 1.9503556665478066e-05,
51
+ "loss": 0.082,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.22653721682847897,
56
+ "grad_norm": 0.6025184392929077,
57
+ "learning_rate": 1.9092047447238775e-05,
58
+ "loss": 0.0635,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.2588996763754045,
63
+ "grad_norm": 0.5912179946899414,
64
+ "learning_rate": 1.856455114887056e-05,
65
+ "loss": 0.0586,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.2912621359223301,
70
+ "grad_norm": 0.460332989692688,
71
+ "learning_rate": 1.792779703083777e-05,
72
+ "loss": 0.067,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.32362459546925565,
77
+ "grad_norm": 0.4907929003238678,
78
+ "learning_rate": 1.7189908153577473e-05,
79
+ "loss": 0.0597,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.3559870550161812,
84
+ "grad_norm": 0.522468090057373,
85
+ "learning_rate": 1.636029775176862e-05,
86
+ "loss": 0.0579,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.3883495145631068,
91
+ "grad_norm": 0.43857327103614807,
92
+ "learning_rate": 1.544954914987238e-05,
93
+ "loss": 0.0568,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.42071197411003236,
98
+ "grad_norm": 0.36459407210350037,
99
+ "learning_rate": 1.4469280750858854e-05,
100
+ "loss": 0.0527,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.45307443365695793,
105
+ "grad_norm": 0.39144057035446167,
106
+ "learning_rate": 1.3431997820456592e-05,
107
+ "loss": 0.0491,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.4854368932038835,
112
+ "grad_norm": 0.45727330446243286,
113
+ "learning_rate": 1.2350932957710322e-05,
114
+ "loss": 0.0473,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.517799352750809,
119
+ "grad_norm": 0.4593789875507355,
120
+ "learning_rate": 1.1239877286961123e-05,
121
+ "loss": 0.0463,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.5501618122977346,
126
+ "grad_norm": 0.36281996965408325,
127
+ "learning_rate": 1.01130045247298e-05,
128
+ "loss": 0.05,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.5825242718446602,
133
+ "grad_norm": 0.46488189697265625,
134
+ "learning_rate": 8.98469016587892e-06,
135
+ "loss": 0.0441,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.6148867313915858,
140
+ "grad_norm": 0.39195364713668823,
141
+ "learning_rate": 7.869328095692313e-06,
142
+ "loss": 0.0429,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.6472491909385113,
147
+ "grad_norm": 0.3537571430206299,
148
+ "learning_rate": 6.781146967348283e-06,
149
+ "loss": 0.0432,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.6796116504854369,
154
+ "grad_norm": 0.35812804102897644,
155
+ "learning_rate": 5.7340286872557515e-06,
156
+ "loss": 0.0365,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.7119741100323624,
161
+ "grad_norm": 0.29698875546455383,
162
+ "learning_rate": 4.7413313238324556e-06,
163
+ "loss": 0.0422,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.7443365695792881,
168
+ "grad_norm": 0.37994715571403503,
169
+ "learning_rate": 3.815718698874672e-06,
170
+ "loss": 0.0425,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.7766990291262136,
175
+ "grad_norm": 0.3461650013923645,
176
+ "learning_rate": 2.9689988354181742e-06,
177
+ "loss": 0.0384,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.8090614886731392,
182
+ "grad_norm": 0.30156540870666504,
183
+ "learning_rate": 2.211973323008041e-06,
184
+ "loss": 0.0422,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.8414239482200647,
189
+ "grad_norm": 0.44933223724365234,
190
+ "learning_rate": 1.5542995220217961e-06,
191
+ "loss": 0.0415,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.8737864077669902,
196
+ "grad_norm": 0.3253950774669647,
197
+ "learning_rate": 1.0043673649027519e-06,
198
+ "loss": 0.038,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.9061488673139159,
203
+ "grad_norm": 0.2673346996307373,
204
+ "learning_rate": 5.691923259479093e-07,
205
+ "loss": 0.038,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.9385113268608414,
210
+ "grad_norm": 0.2838808596134186,
211
+ "learning_rate": 2.5432592503288e-07,
212
+ "loss": 0.0382,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.970873786407767,
217
+ "grad_norm": 0.3232278823852539,
218
+ "learning_rate": 6.378490697611761e-08,
219
+ "loss": 0.0407,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 1.0,
224
+ "step": 309,
225
+ "total_flos": 3.151751154761728e+17,
226
+ "train_loss": 0.0763875040132251,
227
+ "train_runtime": 2448.9622,
228
+ "train_samples_per_second": 4.037,
229
+ "train_steps_per_second": 0.126
230
+ }
231
+ ],
232
+ "logging_steps": 10,
233
+ "max_steps": 309,
234
+ "num_input_tokens_seen": 0,
235
+ "num_train_epochs": 1,
236
+ "save_steps": 100,
237
+ "stateful_callbacks": {
238
+ "TrainerControl": {
239
+ "args": {
240
+ "should_epoch_stop": false,
241
+ "should_evaluate": false,
242
+ "should_log": false,
243
+ "should_save": true,
244
+ "should_training_stop": true
245
+ },
246
+ "attributes": {}
247
+ }
248
+ },
249
+ "total_flos": 3.151751154761728e+17,
250
+ "train_batch_size": 8,
251
+ "trial_name": null,
252
+ "trial_params": null
253
+ }
training_loss.png ADDED