abdo-Mansour commited on
Commit
fcb2d24
·
verified ·
1 Parent(s): e525abe

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ license: apache-2.0
4
  base_model: Qwen/Qwen3-0.6B
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: Extractor_Adaptor_Qwen3_QA_zero
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # Extractor_Adaptor_Qwen3_QA_zero
17
 
18
- This model is a fine-tuned version of [Qwen/Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.6387
21
 
 
4
  base_model: Qwen/Qwen3-0.6B
5
  tags:
6
  - llama-factory
7
+ - lora
8
  - generated_from_trainer
9
  model-index:
10
  - name: Extractor_Adaptor_Qwen3_QA_zero
 
16
 
17
  # Extractor_Adaptor_Qwen3_QA_zero
18
 
19
+ This model is a fine-tuned version of [Qwen/Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B) on the web_finetune_train dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.6387
22
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.6386752128601074,
4
+ "eval_runtime": 413.3513,
5
+ "eval_samples_per_second": 3.404,
6
+ "eval_steps_per_second": 0.852,
7
+ "total_flos": 9.150706299070054e+16,
8
+ "train_loss": 0.701680494077278,
9
+ "train_runtime": 17774.8464,
10
+ "train_samples_per_second": 0.712,
11
+ "train_steps_per_second": 0.022
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.6386752128601074,
4
+ "eval_runtime": 413.3513,
5
+ "eval_samples_per_second": 3.404,
6
+ "eval_steps_per_second": 0.852
7
+ }
runs/Dec30_23-03-20_e767f745df83/events.out.tfevents.1767154090.e767f745df83.136.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96130f402325db6626e06d022237c309bc11b6458ab476beba98f419657908a7
3
+ size 359
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 9.150706299070054e+16,
4
+ "train_loss": 0.701680494077278,
5
+ "train_runtime": 17774.8464,
6
+ "train_samples_per_second": 0.712,
7
+ "train_steps_per_second": 0.022
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 350,
3
+ "best_metric": 0.6386752128601074,
4
+ "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-350",
5
+ "epoch": 1.0,
6
+ "eval_steps": 50,
7
+ "global_step": 396,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02527646129541864,
14
+ "grad_norm": 14.924928665161133,
15
+ "learning_rate": 4.5e-06,
16
+ "loss": 1.6848,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.05055292259083728,
21
+ "grad_norm": 9.861122131347656,
22
+ "learning_rate": 9.5e-06,
23
+ "loss": 0.9777,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.07582938388625593,
28
+ "grad_norm": 8.668806076049805,
29
+ "learning_rate": 1.45e-05,
30
+ "loss": 0.8539,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.10110584518167456,
35
+ "grad_norm": 8.786724090576172,
36
+ "learning_rate": 1.95e-05,
37
+ "loss": 0.824,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.1263823064770932,
42
+ "grad_norm": 6.658519744873047,
43
+ "learning_rate": 1.996847707779778e-05,
44
+ "loss": 0.7459,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.1263823064770932,
49
+ "eval_loss": 0.7653215527534485,
50
+ "eval_runtime": 413.3579,
51
+ "eval_samples_per_second": 3.404,
52
+ "eval_steps_per_second": 0.852,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 0.15165876777251186,
57
+ "grad_norm": 7.256345748901367,
58
+ "learning_rate": 1.985976407793198e-05,
59
+ "loss": 0.7691,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 0.1769352290679305,
64
+ "grad_norm": 6.2496724128723145,
65
+ "learning_rate": 1.9674317682923535e-05,
66
+ "loss": 0.6842,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 0.20221169036334913,
71
+ "grad_norm": 6.8075408935546875,
72
+ "learning_rate": 1.941358112522644e-05,
73
+ "loss": 0.7287,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 0.22748815165876776,
78
+ "grad_norm": 6.674585819244385,
79
+ "learning_rate": 1.907958358131508e-05,
80
+ "loss": 0.7173,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 0.2527646129541864,
85
+ "grad_norm": 6.967682838439941,
86
+ "learning_rate": 1.867492437966334e-05,
87
+ "loss": 0.6676,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 0.2527646129541864,
92
+ "eval_loss": 0.7142078876495361,
93
+ "eval_runtime": 413.3775,
94
+ "eval_samples_per_second": 3.404,
95
+ "eval_steps_per_second": 0.852,
96
+ "step": 100
97
+ },
98
+ {
99
+ "epoch": 0.27804107424960506,
100
+ "grad_norm": 6.488720893859863,
101
+ "learning_rate": 1.820275277152846e-05,
102
+ "loss": 0.6861,
103
+ "step": 110
104
+ },
105
+ {
106
+ "epoch": 0.3033175355450237,
107
+ "grad_norm": 6.127718448638916,
108
+ "learning_rate": 1.7666743421972986e-05,
109
+ "loss": 0.681,
110
+ "step": 120
111
+ },
112
+ {
113
+ "epoch": 0.3285939968404423,
114
+ "grad_norm": 5.501866340637207,
115
+ "learning_rate": 1.7071067811865477e-05,
116
+ "loss": 0.6915,
117
+ "step": 130
118
+ },
119
+ {
120
+ "epoch": 0.353870458135861,
121
+ "grad_norm": 5.910053730010986,
122
+ "learning_rate": 1.6420361773423205e-05,
123
+ "loss": 0.677,
124
+ "step": 140
125
+ },
126
+ {
127
+ "epoch": 0.3791469194312796,
128
+ "grad_norm": 6.499867916107178,
129
+ "learning_rate": 1.571968941195081e-05,
130
+ "loss": 0.7094,
131
+ "step": 150
132
+ },
133
+ {
134
+ "epoch": 0.3791469194312796,
135
+ "eval_loss": 0.6856361031532288,
136
+ "eval_runtime": 413.5831,
137
+ "eval_samples_per_second": 3.402,
138
+ "eval_steps_per_second": 0.851,
139
+ "step": 150
140
+ },
141
+ {
142
+ "epoch": 0.40442338072669826,
143
+ "grad_norm": 5.411413669586182,
144
+ "learning_rate": 1.4974503694553119e-05,
145
+ "loss": 0.6963,
146
+ "step": 160
147
+ },
148
+ {
149
+ "epoch": 0.4296998420221169,
150
+ "grad_norm": 6.184033393859863,
151
+ "learning_rate": 1.4190604012539684e-05,
152
+ "loss": 0.6738,
153
+ "step": 170
154
+ },
155
+ {
156
+ "epoch": 0.4549763033175355,
157
+ "grad_norm": 6.484604358673096,
158
+ "learning_rate": 1.3374091047790585e-05,
159
+ "loss": 0.6558,
160
+ "step": 180
161
+ },
162
+ {
163
+ "epoch": 0.4802527646129542,
164
+ "grad_norm": 5.717767238616943,
165
+ "learning_rate": 1.2531319294335084e-05,
166
+ "loss": 0.612,
167
+ "step": 190
168
+ },
169
+ {
170
+ "epoch": 0.5055292259083728,
171
+ "grad_norm": 6.289947032928467,
172
+ "learning_rate": 1.1668847604642861e-05,
173
+ "loss": 0.6612,
174
+ "step": 200
175
+ },
176
+ {
177
+ "epoch": 0.5055292259083728,
178
+ "eval_loss": 0.666141152381897,
179
+ "eval_runtime": 413.6045,
180
+ "eval_samples_per_second": 3.402,
181
+ "eval_steps_per_second": 0.851,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 0.5308056872037915,
186
+ "grad_norm": 5.043293476104736,
187
+ "learning_rate": 1.0793388145500199e-05,
188
+ "loss": 0.6594,
189
+ "step": 210
190
+ },
191
+ {
192
+ "epoch": 0.5560821484992101,
193
+ "grad_norm": 5.2453203201293945,
194
+ "learning_rate": 9.911754160720924e-06,
195
+ "loss": 0.6347,
196
+ "step": 220
197
+ },
198
+ {
199
+ "epoch": 0.5813586097946287,
200
+ "grad_norm": 6.464652061462402,
201
+ "learning_rate": 9.030806947227607e-06,
202
+ "loss": 0.6543,
203
+ "step": 230
204
+ },
205
+ {
206
+ "epoch": 0.6066350710900474,
207
+ "grad_norm": 5.646970272064209,
208
+ "learning_rate": 8.157402457160539e-06,
209
+ "loss": 0.6667,
210
+ "step": 240
211
+ },
212
+ {
213
+ "epoch": 0.631911532385466,
214
+ "grad_norm": 5.6440534591674805,
215
+ "learning_rate": 7.298337941582314e-06,
216
+ "loss": 0.6349,
217
+ "step": 250
218
+ },
219
+ {
220
+ "epoch": 0.631911532385466,
221
+ "eval_loss": 0.652662992477417,
222
+ "eval_runtime": 413.1161,
223
+ "eval_samples_per_second": 3.406,
224
+ "eval_steps_per_second": 0.852,
225
+ "step": 250
226
+ },
227
+ {
228
+ "epoch": 0.6571879936808847,
229
+ "grad_norm": 5.690296173095703,
230
+ "learning_rate": 6.460299051022285e-06,
231
+ "loss": 0.6293,
232
+ "step": 260
233
+ },
234
+ {
235
+ "epoch": 0.6824644549763034,
236
+ "grad_norm": 5.673945903778076,
237
+ "learning_rate": 5.649807804549663e-06,
238
+ "loss": 0.6402,
239
+ "step": 270
240
+ },
241
+ {
242
+ "epoch": 0.707740916271722,
243
+ "grad_norm": 6.384261131286621,
244
+ "learning_rate": 4.873171832304852e-06,
245
+ "loss": 0.6626,
246
+ "step": 280
247
+ },
248
+ {
249
+ "epoch": 0.7330173775671406,
250
+ "grad_norm": 6.072003364562988,
251
+ "learning_rate": 4.13643528650785e-06,
252
+ "loss": 0.6121,
253
+ "step": 290
254
+ },
255
+ {
256
+ "epoch": 0.7582938388625592,
257
+ "grad_norm": 5.644257545471191,
258
+ "learning_rate": 3.4453318029777096e-06,
259
+ "loss": 0.6486,
260
+ "step": 300
261
+ },
262
+ {
263
+ "epoch": 0.7582938388625592,
264
+ "eval_loss": 0.6433162093162537,
265
+ "eval_runtime": 413.2942,
266
+ "eval_samples_per_second": 3.404,
267
+ "eval_steps_per_second": 0.852,
268
+ "step": 300
269
+ },
270
+ {
271
+ "epoch": 0.7835703001579779,
272
+ "grad_norm": 6.254457950592041,
273
+ "learning_rate": 2.8052398792390155e-06,
274
+ "loss": 0.6275,
275
+ "step": 310
276
+ },
277
+ {
278
+ "epoch": 0.8088467614533965,
279
+ "grad_norm": 4.991596698760986,
280
+ "learning_rate": 2.2211410164842605e-06,
281
+ "loss": 0.6194,
282
+ "step": 320
283
+ },
284
+ {
285
+ "epoch": 0.8341232227488151,
286
+ "grad_norm": 5.583685874938965,
287
+ "learning_rate": 1.6975809511513352e-06,
288
+ "loss": 0.5763,
289
+ "step": 330
290
+ },
291
+ {
292
+ "epoch": 0.8593996840442338,
293
+ "grad_norm": 5.555745601654053,
294
+ "learning_rate": 1.2386342778305993e-06,
295
+ "loss": 0.6468,
296
+ "step": 340
297
+ },
298
+ {
299
+ "epoch": 0.8846761453396524,
300
+ "grad_norm": 5.202249526977539,
301
+ "learning_rate": 8.478727388228736e-07,
302
+ "loss": 0.617,
303
+ "step": 350
304
+ },
305
+ {
306
+ "epoch": 0.8846761453396524,
307
+ "eval_loss": 0.6386752128601074,
308
+ "eval_runtime": 413.3068,
309
+ "eval_samples_per_second": 3.404,
310
+ "eval_steps_per_second": 0.852,
311
+ "step": 350
312
+ },
313
+ {
314
+ "epoch": 0.909952606635071,
315
+ "grad_norm": 5.008814334869385,
316
+ "learning_rate": 5.283374271342645e-07,
317
+ "loss": 0.6197,
318
+ "step": 360
319
+ },
320
+ {
321
+ "epoch": 0.9352290679304898,
322
+ "grad_norm": 6.082056999206543,
323
+ "learning_rate": 2.8251511923731655e-07,
324
+ "loss": 0.6289,
325
+ "step": 370
326
+ },
327
+ {
328
+ "epoch": 0.9605055292259084,
329
+ "grad_norm": 5.0787553787231445,
330
+ "learning_rate": 1.1231892178829474e-07,
331
+ "loss": 0.6276,
332
+ "step": 380
333
+ },
334
+ {
335
+ "epoch": 0.985781990521327,
336
+ "grad_norm": 5.158481121063232,
337
+ "learning_rate": 1.9073382917097482e-08,
338
+ "loss": 0.5999,
339
+ "step": 390
340
+ },
341
+ {
342
+ "epoch": 1.0,
343
+ "step": 396,
344
+ "total_flos": 9.150706299070054e+16,
345
+ "train_loss": 0.701680494077278,
346
+ "train_runtime": 17774.8464,
347
+ "train_samples_per_second": 0.712,
348
+ "train_steps_per_second": 0.022
349
+ }
350
+ ],
351
+ "logging_steps": 10,
352
+ "max_steps": 396,
353
+ "num_input_tokens_seen": 0,
354
+ "num_train_epochs": 1,
355
+ "save_steps": 50,
356
+ "stateful_callbacks": {
357
+ "TrainerControl": {
358
+ "args": {
359
+ "should_epoch_stop": false,
360
+ "should_evaluate": false,
361
+ "should_log": false,
362
+ "should_save": true,
363
+ "should_training_stop": true
364
+ },
365
+ "attributes": {}
366
+ }
367
+ },
368
+ "total_flos": 9.150706299070054e+16,
369
+ "train_batch_size": 2,
370
+ "trial_name": null,
371
+ "trial_params": null
372
+ }
training_eval_loss.png ADDED
training_loss.png ADDED