robertou2 commited on
Commit
1bdea0b
·
verified ·
1 Parent(s): f650fe5

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -23,9 +23,9 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "qkv_proj",
27
- "gate_up_proj",
28
  "down_proj",
 
 
29
  "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "down_proj",
27
+ "gate_up_proj",
28
+ "qkv_proj",
29
  "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f571970d62c9a8caf2e66a1489442f03be777c696646e79f5de2ea09da524792
3
  size 632310976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:219277153c7e6beba1374d1b5a4bae962aaa8a602522d8448a59e093c3b0d3de
3
  size 632310976
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a907cfe89e9e772a52b6a29b54df5ff87fcc123733b91bbc909a0297c1645ae0
3
  size 1264705163
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3160fe49690479d2d460f0ecedca6162eded8370072c39b917293c6283cd1106
3
  size 1264705163
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 200,
3
- "best_metric": 0.07751981914043427,
4
  "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_Qwen/Qwen2.5-3B-Instruct/checkpoint-200",
5
  "epoch": 10.526315789473685,
6
  "eval_steps": 10,
@@ -11,302 +11,302 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.5263157894736842,
14
- "grad_norm": 18.681015014648438,
15
  "learning_rate": 0.0003,
16
- "loss": 1.6766,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.5263157894736842,
21
- "eval_loss": 1.6542210578918457,
22
- "eval_runtime": 2.9595,
23
- "eval_samples_per_second": 10.137,
24
- "eval_steps_per_second": 1.352,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 1.0526315789473684,
29
- "grad_norm": 2.2868361473083496,
30
  "learning_rate": 0.0004996426526821629,
31
- "loss": 1.7116,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 1.0526315789473684,
36
- "eval_loss": 0.9941667914390564,
37
- "eval_runtime": 2.9204,
38
- "eval_samples_per_second": 10.273,
39
- "eval_steps_per_second": 1.37,
40
  "step": 20
41
  },
42
  {
43
  "epoch": 1.5789473684210527,
44
- "grad_norm": 27.153867721557617,
45
  "learning_rate": 0.000495634218306187,
46
- "loss": 1.5062,
47
  "step": 30
48
  },
49
  {
50
  "epoch": 1.5789473684210527,
51
- "eval_loss": 1.3414534330368042,
52
- "eval_runtime": 2.9658,
53
- "eval_samples_per_second": 10.115,
54
- "eval_steps_per_second": 1.349,
55
  "step": 30
56
  },
57
  {
58
  "epoch": 2.1052631578947367,
59
- "grad_norm": 7.481041431427002,
60
  "learning_rate": 0.0004872424354853545,
61
- "loss": 1.2521,
62
  "step": 40
63
  },
64
  {
65
  "epoch": 2.1052631578947367,
66
- "eval_loss": 1.2095614671707153,
67
- "eval_runtime": 2.9569,
68
- "eval_samples_per_second": 10.146,
69
- "eval_steps_per_second": 1.353,
70
  "step": 40
71
  },
72
  {
73
  "epoch": 2.6315789473684212,
74
- "grad_norm": 2.246990442276001,
75
  "learning_rate": 0.00047461705578290833,
76
- "loss": 1.0129,
77
  "step": 50
78
  },
79
  {
80
  "epoch": 2.6315789473684212,
81
- "eval_loss": 0.9087256789207458,
82
- "eval_runtime": 2.9647,
83
- "eval_samples_per_second": 10.119,
84
- "eval_steps_per_second": 1.349,
85
  "step": 50
86
  },
87
  {
88
  "epoch": 3.1578947368421053,
89
- "grad_norm": 96.21257781982422,
90
  "learning_rate": 0.00045798337939873923,
91
- "loss": 2.4625,
92
  "step": 60
93
  },
94
  {
95
  "epoch": 3.1578947368421053,
96
- "eval_loss": 4.777709484100342,
97
- "eval_runtime": 2.9487,
98
- "eval_samples_per_second": 10.174,
99
- "eval_steps_per_second": 1.357,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 3.6842105263157894,
104
- "grad_norm": 9.973808288574219,
105
  "learning_rate": 0.0004376382346819819,
106
- "loss": 3.3295,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 3.6842105263157894,
111
- "eval_loss": 1.1951773166656494,
112
- "eval_runtime": 2.9632,
113
- "eval_samples_per_second": 10.124,
114
- "eval_steps_per_second": 1.35,
115
  "step": 70
116
  },
117
  {
118
  "epoch": 4.2105263157894735,
119
- "grad_norm": 2.2323639392852783,
120
  "learning_rate": 0.0004139446812220924,
121
- "loss": 1.0794,
122
  "step": 80
123
  },
124
  {
125
  "epoch": 4.2105263157894735,
126
- "eval_loss": 0.8177169561386108,
127
- "eval_runtime": 2.9594,
128
- "eval_samples_per_second": 10.137,
129
- "eval_steps_per_second": 1.352,
130
  "step": 80
131
  },
132
  {
133
  "epoch": 4.7368421052631575,
134
- "grad_norm": 3.0855352878570557,
135
  "learning_rate": 0.00038732553104187296,
136
- "loss": 1.1521,
137
  "step": 90
138
  },
139
  {
140
  "epoch": 4.7368421052631575,
141
- "eval_loss": 0.9294276833534241,
142
- "eval_runtime": 2.9579,
143
- "eval_samples_per_second": 10.142,
144
- "eval_steps_per_second": 1.352,
145
  "step": 90
146
  },
147
  {
148
  "epoch": 5.2631578947368425,
149
- "grad_norm": 2.238079071044922,
150
  "learning_rate": 0.0003582558035069091,
151
- "loss": 0.9498,
152
  "step": 100
153
  },
154
  {
155
  "epoch": 5.2631578947368425,
156
- "eval_loss": 0.811234176158905,
157
- "eval_runtime": 2.963,
158
- "eval_samples_per_second": 10.125,
159
- "eval_steps_per_second": 1.35,
160
  "step": 100
161
  },
162
  {
163
  "epoch": 5.7894736842105265,
164
- "grad_norm": 2.5768349170684814,
165
  "learning_rate": 0.00032725424859373687,
166
- "loss": 0.8017,
167
  "step": 110
168
  },
169
  {
170
  "epoch": 5.7894736842105265,
171
- "eval_loss": 0.7408804297447205,
172
- "eval_runtime": 2.9552,
173
- "eval_samples_per_second": 10.151,
174
  "eval_steps_per_second": 1.354,
175
  "step": 110
176
  },
177
  {
178
  "epoch": 6.315789473684211,
179
- "grad_norm": 1.3286046981811523,
180
  "learning_rate": 0.0002948740897842223,
181
- "loss": 0.7342,
182
  "step": 120
183
  },
184
  {
185
  "epoch": 6.315789473684211,
186
- "eval_loss": 0.6850324273109436,
187
- "eval_runtime": 2.9619,
188
- "eval_samples_per_second": 10.129,
189
- "eval_steps_per_second": 1.35,
190
  "step": 120
191
  },
192
  {
193
  "epoch": 6.842105263157895,
194
- "grad_norm": 1.8766930103302002,
195
  "learning_rate": 0.00026169315177942135,
196
- "loss": 0.6958,
197
  "step": 130
198
  },
199
  {
200
  "epoch": 6.842105263157895,
201
- "eval_loss": 0.579915463924408,
202
- "eval_runtime": 2.956,
203
- "eval_samples_per_second": 10.149,
204
- "eval_steps_per_second": 1.353,
205
  "step": 130
206
  },
207
  {
208
  "epoch": 7.368421052631579,
209
- "grad_norm": 1.2298275232315063,
210
  "learning_rate": 0.00022830354920410064,
211
- "loss": 0.6327,
212
  "step": 140
213
  },
214
  {
215
  "epoch": 7.368421052631579,
216
- "eval_loss": 0.5571870803833008,
217
- "eval_runtime": 2.9559,
218
- "eval_samples_per_second": 10.149,
219
- "eval_steps_per_second": 1.353,
220
  "step": 140
221
  },
222
  {
223
  "epoch": 7.894736842105263,
224
- "grad_norm": 1.4947859048843384,
225
  "learning_rate": 0.0001953011203072312,
226
- "loss": 0.5699,
227
  "step": 150
228
  },
229
  {
230
  "epoch": 7.894736842105263,
231
- "eval_loss": 0.47740742564201355,
232
- "eval_runtime": 2.966,
233
- "eval_samples_per_second": 10.115,
234
- "eval_steps_per_second": 1.349,
235
  "step": 150
236
  },
237
  {
238
  "epoch": 8.421052631578947,
239
- "grad_norm": 2.0249719619750977,
240
  "learning_rate": 0.00016327479421431983,
241
- "loss": 0.4651,
242
  "step": 160
243
  },
244
  {
245
  "epoch": 8.421052631578947,
246
- "eval_loss": 0.39508286118507385,
247
- "eval_runtime": 2.9527,
248
- "eval_samples_per_second": 10.16,
249
- "eval_steps_per_second": 1.355,
250
  "step": 160
251
  },
252
  {
253
  "epoch": 8.947368421052632,
254
- "grad_norm": 1.2812429666519165,
255
  "learning_rate": 0.00013279608147321223,
256
- "loss": 0.4375,
257
  "step": 170
258
  },
259
  {
260
  "epoch": 8.947368421052632,
261
- "eval_loss": 0.30384376645088196,
262
- "eval_runtime": 2.968,
263
- "eval_samples_per_second": 10.108,
264
- "eval_steps_per_second": 1.348,
265
  "step": 170
266
  },
267
  {
268
  "epoch": 9.473684210526315,
269
- "grad_norm": 1.797318935394287,
270
  "learning_rate": 0.00010440887543482746,
271
- "loss": 0.2687,
272
  "step": 180
273
  },
274
  {
275
  "epoch": 9.473684210526315,
276
- "eval_loss": 0.21915049850940704,
277
- "eval_runtime": 2.9636,
278
- "eval_samples_per_second": 10.123,
279
- "eval_steps_per_second": 1.35,
280
  "step": 180
281
  },
282
  {
283
  "epoch": 10.0,
284
- "grad_norm": 1.2623144388198853,
285
  "learning_rate": 7.861974646342596e-05,
286
- "loss": 0.2708,
287
  "step": 190
288
  },
289
  {
290
  "epoch": 10.0,
291
- "eval_loss": 0.14943605661392212,
292
- "eval_runtime": 2.9609,
293
- "eval_samples_per_second": 10.132,
294
- "eval_steps_per_second": 1.351,
295
  "step": 190
296
  },
297
  {
298
  "epoch": 10.526315789473685,
299
- "grad_norm": 1.1345982551574707,
300
  "learning_rate": 5.58889021764582e-05,
301
- "loss": 0.1275,
302
  "step": 200
303
  },
304
  {
305
  "epoch": 10.526315789473685,
306
- "eval_loss": 0.07751981914043427,
307
- "eval_runtime": 2.9635,
308
- "eval_samples_per_second": 10.123,
309
- "eval_steps_per_second": 1.35,
310
  "step": 200
311
  }
312
  ],
 
1
  {
2
  "best_global_step": 200,
3
+ "best_metric": 0.09495183825492859,
4
  "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_Qwen/Qwen2.5-3B-Instruct/checkpoint-200",
5
  "epoch": 10.526315789473685,
6
  "eval_steps": 10,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.5263157894736842,
14
+ "grad_norm": 2.6249654293060303,
15
  "learning_rate": 0.0003,
16
+ "loss": 1.6684,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.5263157894736842,
21
+ "eval_loss": 0.9868702292442322,
22
+ "eval_runtime": 2.9701,
23
+ "eval_samples_per_second": 10.101,
24
+ "eval_steps_per_second": 1.347,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 1.0526315789473684,
29
+ "grad_norm": 98.9109115600586,
30
  "learning_rate": 0.0004996426526821629,
31
+ "loss": 1.1963,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 1.0526315789473684,
36
+ "eval_loss": 1.1383196115493774,
37
+ "eval_runtime": 2.948,
38
+ "eval_samples_per_second": 10.176,
39
+ "eval_steps_per_second": 1.357,
40
  "step": 20
41
  },
42
  {
43
  "epoch": 1.5789473684210527,
44
+ "grad_norm": 2.453364849090576,
45
  "learning_rate": 0.000495634218306187,
46
+ "loss": 1.2926,
47
  "step": 30
48
  },
49
  {
50
  "epoch": 1.5789473684210527,
51
+ "eval_loss": 0.9744565486907959,
52
+ "eval_runtime": 2.9493,
53
+ "eval_samples_per_second": 10.172,
54
+ "eval_steps_per_second": 1.356,
55
  "step": 30
56
  },
57
  {
58
  "epoch": 2.1052631578947367,
59
+ "grad_norm": 4.493265628814697,
60
  "learning_rate": 0.0004872424354853545,
61
+ "loss": 1.974,
62
  "step": 40
63
  },
64
  {
65
  "epoch": 2.1052631578947367,
66
+ "eval_loss": 0.9542869329452515,
67
+ "eval_runtime": 2.9431,
68
+ "eval_samples_per_second": 10.193,
69
+ "eval_steps_per_second": 1.359,
70
  "step": 40
71
  },
72
  {
73
  "epoch": 2.6315789473684212,
74
+ "grad_norm": 13.221575736999512,
75
  "learning_rate": 0.00047461705578290833,
76
+ "loss": 1.0402,
77
  "step": 50
78
  },
79
  {
80
  "epoch": 2.6315789473684212,
81
+ "eval_loss": 1.1257095336914062,
82
+ "eval_runtime": 2.9455,
83
+ "eval_samples_per_second": 10.185,
84
+ "eval_steps_per_second": 1.358,
85
  "step": 50
86
  },
87
  {
88
  "epoch": 3.1578947368421053,
89
+ "grad_norm": 166.74429321289062,
90
  "learning_rate": 0.00045798337939873923,
91
+ "loss": 1.1376,
92
  "step": 60
93
  },
94
  {
95
  "epoch": 3.1578947368421053,
96
+ "eval_loss": 4.395293712615967,
97
+ "eval_runtime": 2.9496,
98
+ "eval_samples_per_second": 10.171,
99
+ "eval_steps_per_second": 1.356,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 3.6842105263157894,
104
+ "grad_norm": 12.618789672851562,
105
  "learning_rate": 0.0004376382346819819,
106
+ "loss": 1.4838,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 3.6842105263157894,
111
+ "eval_loss": 0.8322703242301941,
112
+ "eval_runtime": 2.9453,
113
+ "eval_samples_per_second": 10.186,
114
+ "eval_steps_per_second": 1.358,
115
  "step": 70
116
  },
117
  {
118
  "epoch": 4.2105263157894735,
119
+ "grad_norm": 9.384139060974121,
120
  "learning_rate": 0.0004139446812220924,
121
+ "loss": 1.2534,
122
  "step": 80
123
  },
124
  {
125
  "epoch": 4.2105263157894735,
126
+ "eval_loss": 0.9503701329231262,
127
+ "eval_runtime": 2.9381,
128
+ "eval_samples_per_second": 10.211,
129
+ "eval_steps_per_second": 1.361,
130
  "step": 80
131
  },
132
  {
133
  "epoch": 4.7368421052631575,
134
+ "grad_norm": 1.788373589515686,
135
  "learning_rate": 0.00038732553104187296,
136
+ "loss": 0.8956,
137
  "step": 90
138
  },
139
  {
140
  "epoch": 4.7368421052631575,
141
+ "eval_loss": 0.7270878553390503,
142
+ "eval_runtime": 2.9534,
143
+ "eval_samples_per_second": 10.158,
144
+ "eval_steps_per_second": 1.354,
145
  "step": 90
146
  },
147
  {
148
  "epoch": 5.2631578947368425,
149
+ "grad_norm": 17.652860641479492,
150
  "learning_rate": 0.0003582558035069091,
151
+ "loss": 0.8018,
152
  "step": 100
153
  },
154
  {
155
  "epoch": 5.2631578947368425,
156
+ "eval_loss": 0.9296186566352844,
157
+ "eval_runtime": 2.9361,
158
+ "eval_samples_per_second": 10.218,
159
+ "eval_steps_per_second": 1.362,
160
  "step": 100
161
  },
162
  {
163
  "epoch": 5.7894736842105265,
164
+ "grad_norm": 2.3047244548797607,
165
  "learning_rate": 0.00032725424859373687,
166
+ "loss": 0.8493,
167
  "step": 110
168
  },
169
  {
170
  "epoch": 5.7894736842105265,
171
+ "eval_loss": 0.7011998891830444,
172
+ "eval_runtime": 2.9544,
173
+ "eval_samples_per_second": 10.154,
174
  "eval_steps_per_second": 1.354,
175
  "step": 110
176
  },
177
  {
178
  "epoch": 6.315789473684211,
179
+ "grad_norm": 1.649305820465088,
180
  "learning_rate": 0.0002948740897842223,
181
+ "loss": 0.6742,
182
  "step": 120
183
  },
184
  {
185
  "epoch": 6.315789473684211,
186
+ "eval_loss": 0.6724004149436951,
187
+ "eval_runtime": 2.9459,
188
+ "eval_samples_per_second": 10.184,
189
+ "eval_steps_per_second": 1.358,
190
  "step": 120
191
  },
192
  {
193
  "epoch": 6.842105263157895,
194
+ "grad_norm": 1.2425477504730225,
195
  "learning_rate": 0.00026169315177942135,
196
+ "loss": 0.6651,
197
  "step": 130
198
  },
199
  {
200
  "epoch": 6.842105263157895,
201
+ "eval_loss": 0.5668935179710388,
202
+ "eval_runtime": 2.944,
203
+ "eval_samples_per_second": 10.19,
204
+ "eval_steps_per_second": 1.359,
205
  "step": 130
206
  },
207
  {
208
  "epoch": 7.368421052631579,
209
+ "grad_norm": 1.3508877754211426,
210
  "learning_rate": 0.00022830354920410064,
211
+ "loss": 0.6002,
212
  "step": 140
213
  },
214
  {
215
  "epoch": 7.368421052631579,
216
+ "eval_loss": 0.552211344242096,
217
+ "eval_runtime": 2.9471,
218
+ "eval_samples_per_second": 10.179,
219
+ "eval_steps_per_second": 1.357,
220
  "step": 140
221
  },
222
  {
223
  "epoch": 7.894736842105263,
224
+ "grad_norm": 1.030277132987976,
225
  "learning_rate": 0.0001953011203072312,
226
+ "loss": 0.5302,
227
  "step": 150
228
  },
229
  {
230
  "epoch": 7.894736842105263,
231
+ "eval_loss": 0.43232136964797974,
232
+ "eval_runtime": 2.9451,
233
+ "eval_samples_per_second": 10.186,
234
+ "eval_steps_per_second": 1.358,
235
  "step": 150
236
  },
237
  {
238
  "epoch": 8.421052631578947,
239
+ "grad_norm": 1.2759102582931519,
240
  "learning_rate": 0.00016327479421431983,
241
+ "loss": 0.4205,
242
  "step": 160
243
  },
244
  {
245
  "epoch": 8.421052631578947,
246
+ "eval_loss": 0.3660585284233093,
247
+ "eval_runtime": 2.9446,
248
+ "eval_samples_per_second": 10.188,
249
+ "eval_steps_per_second": 1.358,
250
  "step": 160
251
  },
252
  {
253
  "epoch": 8.947368421052632,
254
+ "grad_norm": 1.2652796506881714,
255
  "learning_rate": 0.00013279608147321223,
256
+ "loss": 0.4143,
257
  "step": 170
258
  },
259
  {
260
  "epoch": 8.947368421052632,
261
+ "eval_loss": 0.2963067889213562,
262
+ "eval_runtime": 2.9476,
263
+ "eval_samples_per_second": 10.178,
264
+ "eval_steps_per_second": 1.357,
265
  "step": 170
266
  },
267
  {
268
  "epoch": 9.473684210526315,
269
+ "grad_norm": 1.2629317045211792,
270
  "learning_rate": 0.00010440887543482746,
271
+ "loss": 0.2601,
272
  "step": 180
273
  },
274
  {
275
  "epoch": 9.473684210526315,
276
+ "eval_loss": 0.21139691770076752,
277
+ "eval_runtime": 2.9514,
278
+ "eval_samples_per_second": 10.165,
279
+ "eval_steps_per_second": 1.355,
280
  "step": 180
281
  },
282
  {
283
  "epoch": 10.0,
284
+ "grad_norm": 1.5134602785110474,
285
  "learning_rate": 7.861974646342596e-05,
286
+ "loss": 0.2535,
287
  "step": 190
288
  },
289
  {
290
  "epoch": 10.0,
291
+ "eval_loss": 0.15356798470020294,
292
+ "eval_runtime": 2.945,
293
+ "eval_samples_per_second": 10.187,
294
+ "eval_steps_per_second": 1.358,
295
  "step": 190
296
  },
297
  {
298
  "epoch": 10.526315789473685,
299
+ "grad_norm": 1.9126681089401245,
300
  "learning_rate": 5.58889021764582e-05,
301
+ "loss": 0.1294,
302
  "step": 200
303
  },
304
  {
305
  "epoch": 10.526315789473685,
306
+ "eval_loss": 0.09495183825492859,
307
+ "eval_runtime": 2.9433,
308
+ "eval_samples_per_second": 10.193,
309
+ "eval_steps_per_second": 1.359,
310
  "step": 200
311
  }
312
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd47b7f55aff0ef5f2641e2589e9395de522d815e4af6c0e3613ff39bcd994a0
3
  size 6033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d501d12360f6c481d46e613473eead035956f34cee7f63a86b84918ee2ecad93
3
  size 6033