ve-nk-at commited on
Commit
3efa078
·
verified ·
1 Parent(s): 63d6d89

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -34,12 +34,12 @@
34
  "revision": null,
35
  "target_modules": [
36
  "o_proj",
37
- "k_proj",
38
- "q_proj",
39
- "gate_proj",
40
  "v_proj",
41
  "down_proj",
42
- "up_proj"
 
 
 
43
  ],
44
  "target_parameters": null,
45
  "task_type": "SEQ_CLS",
 
34
  "revision": null,
35
  "target_modules": [
36
  "o_proj",
 
 
 
37
  "v_proj",
38
  "down_proj",
39
+ "q_proj",
40
+ "k_proj",
41
+ "up_proj",
42
+ "gate_proj"
43
  ],
44
  "target_parameters": null,
45
  "task_type": "SEQ_CLS",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04c015335ffb450bad00de9274b0167e98e159bf17049bc56ea8bd0252d302a1
3
  size 664635272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2416d6550deb818d0f05c387f8565c7b47944e0b1d169a75775dbd8f88bd149f
3
  size 664635272
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff3426cd6cff7eb13a44a0d73b65d6c5a17bd38b953d0a29afa08b38b09460fc
3
  size 1329479786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc364ea3c42c9c346258ba687f8dc4e63455c63fbf573ffe6f682811a922e8f2
3
  size 1329479786
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 320,
3
- "best_metric": 0.5875147670395797,
4
  "best_model_checkpoint": "/content/gemma_lora_imb/checkpoint-320",
5
  "epoch": 1.6494845360824741,
6
  "eval_steps": 20,
@@ -11,258 +11,258 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.10309278350515463,
14
- "grad_norm": 25.1546688079834,
15
  "learning_rate": 9.510309278350516e-06,
16
- "loss": 2.0014,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.10309278350515463,
21
- "eval_f1_macro": 0.33107292753406026,
22
- "eval_loss": 1.6745504140853882,
23
- "eval_runtime": 4.0473,
24
- "eval_samples_per_second": 190.744,
25
- "eval_steps_per_second": 6.177,
26
  "step": 20
27
  },
28
  {
29
  "epoch": 0.20618556701030927,
30
- "grad_norm": 16.38060188293457,
31
  "learning_rate": 8.994845360824743e-06,
32
- "loss": 1.4915,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.20618556701030927,
37
- "eval_f1_macro": 0.3665202210167921,
38
- "eval_loss": 1.254835844039917,
39
- "eval_runtime": 3.9719,
40
- "eval_samples_per_second": 194.367,
41
- "eval_steps_per_second": 6.294,
42
  "step": 40
43
  },
44
  {
45
  "epoch": 0.30927835051546393,
46
- "grad_norm": 30.57720947265625,
47
  "learning_rate": 8.479381443298969e-06,
48
- "loss": 1.1753,
49
  "step": 60
50
  },
51
  {
52
  "epoch": 0.30927835051546393,
53
- "eval_f1_macro": 0.4159936110328978,
54
- "eval_loss": 1.0741406679153442,
55
- "eval_runtime": 4.0545,
56
- "eval_samples_per_second": 190.408,
57
- "eval_steps_per_second": 6.166,
58
  "step": 60
59
  },
60
  {
61
  "epoch": 0.41237113402061853,
62
- "grad_norm": 19.81576156616211,
63
  "learning_rate": 7.963917525773196e-06,
64
- "loss": 0.9657,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.41237113402061853,
69
- "eval_f1_macro": 0.46995424440252304,
70
- "eval_loss": 0.9967098236083984,
71
- "eval_runtime": 3.9961,
72
- "eval_samples_per_second": 193.188,
73
- "eval_steps_per_second": 6.256,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 0.5154639175257731,
78
- "grad_norm": 13.595305442810059,
79
  "learning_rate": 7.448453608247424e-06,
80
- "loss": 0.9258,
81
  "step": 100
82
  },
83
  {
84
  "epoch": 0.5154639175257731,
85
- "eval_f1_macro": 0.49848441168978574,
86
- "eval_loss": 0.8957132697105408,
87
- "eval_runtime": 3.9144,
88
- "eval_samples_per_second": 197.219,
89
- "eval_steps_per_second": 6.387,
90
  "step": 100
91
  },
92
  {
93
  "epoch": 0.6185567010309279,
94
- "grad_norm": 12.591818809509277,
95
  "learning_rate": 6.93298969072165e-06,
96
- "loss": 0.8563,
97
  "step": 120
98
  },
99
  {
100
  "epoch": 0.6185567010309279,
101
- "eval_f1_macro": 0.5165300305298831,
102
- "eval_loss": 0.8264620900154114,
103
- "eval_runtime": 4.0365,
104
- "eval_samples_per_second": 191.257,
105
- "eval_steps_per_second": 6.194,
106
  "step": 120
107
  },
108
  {
109
  "epoch": 0.7216494845360825,
110
- "grad_norm": 9.834779739379883,
111
  "learning_rate": 6.417525773195877e-06,
112
- "loss": 0.8207,
113
  "step": 140
114
  },
115
  {
116
  "epoch": 0.7216494845360825,
117
- "eval_f1_macro": 0.5491629792325976,
118
- "eval_loss": 0.7913520932197571,
119
- "eval_runtime": 3.9557,
120
- "eval_samples_per_second": 195.163,
121
- "eval_steps_per_second": 6.32,
122
  "step": 140
123
  },
124
  {
125
  "epoch": 0.8247422680412371,
126
- "grad_norm": 12.005083084106445,
127
  "learning_rate": 5.902061855670104e-06,
128
- "loss": 0.8001,
129
  "step": 160
130
  },
131
  {
132
  "epoch": 0.8247422680412371,
133
- "eval_f1_macro": 0.5534321329438001,
134
- "eval_loss": 0.7741566300392151,
135
- "eval_runtime": 3.9951,
136
- "eval_samples_per_second": 193.236,
137
- "eval_steps_per_second": 6.258,
138
  "step": 160
139
  },
140
  {
141
  "epoch": 0.9278350515463918,
142
- "grad_norm": 12.000106811523438,
143
  "learning_rate": 5.38659793814433e-06,
144
- "loss": 0.7494,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 0.9278350515463918,
149
- "eval_f1_macro": 0.5579623894625729,
150
- "eval_loss": 0.7399266958236694,
151
- "eval_runtime": 3.9949,
152
- "eval_samples_per_second": 193.246,
153
- "eval_steps_per_second": 6.258,
154
  "step": 180
155
  },
156
  {
157
  "epoch": 1.0309278350515463,
158
- "grad_norm": 13.406641006469727,
159
  "learning_rate": 4.871134020618557e-06,
160
- "loss": 0.719,
161
  "step": 200
162
  },
163
  {
164
  "epoch": 1.0309278350515463,
165
- "eval_f1_macro": 0.5670104123323871,
166
- "eval_loss": 0.7660173773765564,
167
- "eval_runtime": 3.973,
168
- "eval_samples_per_second": 194.314,
169
- "eval_steps_per_second": 6.293,
170
  "step": 200
171
  },
172
  {
173
  "epoch": 1.134020618556701,
174
- "grad_norm": 9.07872200012207,
175
  "learning_rate": 4.355670103092784e-06,
176
- "loss": 0.66,
177
  "step": 220
178
  },
179
  {
180
  "epoch": 1.134020618556701,
181
- "eval_f1_macro": 0.5826124319208055,
182
- "eval_loss": 0.7981044054031372,
183
- "eval_runtime": 3.9838,
184
- "eval_samples_per_second": 193.783,
185
- "eval_steps_per_second": 6.275,
186
  "step": 220
187
  },
188
  {
189
  "epoch": 1.2371134020618557,
190
- "grad_norm": 9.708913803100586,
191
  "learning_rate": 3.840206185567011e-06,
192
- "loss": 0.6715,
193
  "step": 240
194
  },
195
  {
196
  "epoch": 1.2371134020618557,
197
- "eval_f1_macro": 0.574267676962998,
198
- "eval_loss": 0.7557724118232727,
199
- "eval_runtime": 4.0237,
200
- "eval_samples_per_second": 191.864,
201
- "eval_steps_per_second": 6.213,
202
  "step": 240
203
  },
204
  {
205
  "epoch": 1.3402061855670104,
206
- "grad_norm": 7.926121711730957,
207
  "learning_rate": 3.324742268041237e-06,
208
- "loss": 0.6367,
209
  "step": 260
210
  },
211
  {
212
  "epoch": 1.3402061855670104,
213
- "eval_f1_macro": 0.5792622799357451,
214
- "eval_loss": 0.7489193677902222,
215
- "eval_runtime": 3.9886,
216
- "eval_samples_per_second": 193.553,
217
- "eval_steps_per_second": 6.268,
218
  "step": 260
219
  },
220
  {
221
  "epoch": 1.443298969072165,
222
- "grad_norm": 6.716864585876465,
223
  "learning_rate": 2.809278350515464e-06,
224
- "loss": 0.6164,
225
  "step": 280
226
  },
227
  {
228
  "epoch": 1.443298969072165,
229
- "eval_f1_macro": 0.580594142749172,
230
- "eval_loss": 0.7568148970603943,
231
- "eval_runtime": 3.9996,
232
- "eval_samples_per_second": 193.021,
233
- "eval_steps_per_second": 6.251,
234
  "step": 280
235
  },
236
  {
237
  "epoch": 1.5463917525773194,
238
- "grad_norm": 7.458005428314209,
239
  "learning_rate": 2.293814432989691e-06,
240
- "loss": 0.6752,
241
  "step": 300
242
  },
243
  {
244
  "epoch": 1.5463917525773194,
245
- "eval_f1_macro": 0.570129567763359,
246
- "eval_loss": 0.7318532466888428,
247
- "eval_runtime": 4.0088,
248
- "eval_samples_per_second": 192.576,
249
- "eval_steps_per_second": 6.236,
250
  "step": 300
251
  },
252
  {
253
  "epoch": 1.6494845360824741,
254
- "grad_norm": 7.686004161834717,
255
  "learning_rate": 1.7783505154639178e-06,
256
- "loss": 0.5934,
257
  "step": 320
258
  },
259
  {
260
  "epoch": 1.6494845360824741,
261
- "eval_f1_macro": 0.5875147670395797,
262
- "eval_loss": 0.7576584815979004,
263
- "eval_runtime": 4.0068,
264
- "eval_samples_per_second": 192.671,
265
- "eval_steps_per_second": 6.239,
266
  "step": 320
267
  }
268
  ],
 
1
  {
2
  "best_global_step": 320,
3
+ "best_metric": 0.5938381065356336,
4
  "best_model_checkpoint": "/content/gemma_lora_imb/checkpoint-320",
5
  "epoch": 1.6494845360824741,
6
  "eval_steps": 20,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.10309278350515463,
14
+ "grad_norm": 28.475317001342773,
15
  "learning_rate": 9.510309278350516e-06,
16
+ "loss": 1.9688,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.10309278350515463,
21
+ "eval_f1_macro": 0.3268938093426131,
22
+ "eval_loss": 1.6321439743041992,
23
+ "eval_runtime": 3.9651,
24
+ "eval_samples_per_second": 194.698,
25
+ "eval_steps_per_second": 6.305,
26
  "step": 20
27
  },
28
  {
29
  "epoch": 0.20618556701030927,
30
+ "grad_norm": 20.50006866455078,
31
  "learning_rate": 8.994845360824743e-06,
32
+ "loss": 1.3439,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.20618556701030927,
37
+ "eval_f1_macro": 0.3697218634049285,
38
+ "eval_loss": 1.2616751194000244,
39
+ "eval_runtime": 4.2784,
40
+ "eval_samples_per_second": 180.441,
41
+ "eval_steps_per_second": 5.843,
42
  "step": 40
43
  },
44
  {
45
  "epoch": 0.30927835051546393,
46
+ "grad_norm": 41.688751220703125,
47
  "learning_rate": 8.479381443298969e-06,
48
+ "loss": 1.0698,
49
  "step": 60
50
  },
51
  {
52
  "epoch": 0.30927835051546393,
53
+ "eval_f1_macro": 0.4542143805018043,
54
+ "eval_loss": 1.038082480430603,
55
+ "eval_runtime": 3.9347,
56
+ "eval_samples_per_second": 196.201,
57
+ "eval_steps_per_second": 6.354,
58
  "step": 60
59
  },
60
  {
61
  "epoch": 0.41237113402061853,
62
+ "grad_norm": 18.773658752441406,
63
  "learning_rate": 7.963917525773196e-06,
64
+ "loss": 0.8925,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.41237113402061853,
69
+ "eval_f1_macro": 0.49878452511464727,
70
+ "eval_loss": 0.9446011185646057,
71
+ "eval_runtime": 3.9284,
72
+ "eval_samples_per_second": 196.516,
73
+ "eval_steps_per_second": 6.364,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 0.5154639175257731,
78
+ "grad_norm": 15.985794067382812,
79
  "learning_rate": 7.448453608247424e-06,
80
+ "loss": 0.876,
81
  "step": 100
82
  },
83
  {
84
  "epoch": 0.5154639175257731,
85
+ "eval_f1_macro": 0.5263883153406523,
86
+ "eval_loss": 0.8541163206100464,
87
+ "eval_runtime": 3.9231,
88
+ "eval_samples_per_second": 196.785,
89
+ "eval_steps_per_second": 6.373,
90
  "step": 100
91
  },
92
  {
93
  "epoch": 0.6185567010309279,
94
+ "grad_norm": 17.182302474975586,
95
  "learning_rate": 6.93298969072165e-06,
96
+ "loss": 0.8147,
97
  "step": 120
98
  },
99
  {
100
  "epoch": 0.6185567010309279,
101
+ "eval_f1_macro": 0.5381800325973681,
102
+ "eval_loss": 0.8054100275039673,
103
+ "eval_runtime": 3.9483,
104
+ "eval_samples_per_second": 195.53,
105
+ "eval_steps_per_second": 6.332,
106
  "step": 120
107
  },
108
  {
109
  "epoch": 0.7216494845360825,
110
+ "grad_norm": 9.127381324768066,
111
  "learning_rate": 6.417525773195877e-06,
112
+ "loss": 0.8025,
113
  "step": 140
114
  },
115
  {
116
  "epoch": 0.7216494845360825,
117
+ "eval_f1_macro": 0.5465635692018774,
118
+ "eval_loss": 0.7696816921234131,
119
+ "eval_runtime": 3.9227,
120
+ "eval_samples_per_second": 196.804,
121
+ "eval_steps_per_second": 6.373,
122
  "step": 140
123
  },
124
  {
125
  "epoch": 0.8247422680412371,
126
+ "grad_norm": 13.120288848876953,
127
  "learning_rate": 5.902061855670104e-06,
128
+ "loss": 0.7859,
129
  "step": 160
130
  },
131
  {
132
  "epoch": 0.8247422680412371,
133
+ "eval_f1_macro": 0.5512889360726072,
134
+ "eval_loss": 0.7549648880958557,
135
+ "eval_runtime": 3.9339,
136
+ "eval_samples_per_second": 196.24,
137
+ "eval_steps_per_second": 6.355,
138
  "step": 160
139
  },
140
  {
141
  "epoch": 0.9278350515463918,
142
+ "grad_norm": 12.05843448638916,
143
  "learning_rate": 5.38659793814433e-06,
144
+ "loss": 0.7499,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 0.9278350515463918,
149
+ "eval_f1_macro": 0.5526664350368882,
150
+ "eval_loss": 0.7347894906997681,
151
+ "eval_runtime": 3.9585,
152
+ "eval_samples_per_second": 195.022,
153
+ "eval_steps_per_second": 6.315,
154
  "step": 180
155
  },
156
  {
157
  "epoch": 1.0309278350515463,
158
+ "grad_norm": 10.693439483642578,
159
  "learning_rate": 4.871134020618557e-06,
160
+ "loss": 0.7156,
161
  "step": 200
162
  },
163
  {
164
  "epoch": 1.0309278350515463,
165
+ "eval_f1_macro": 0.5764236536326763,
166
+ "eval_loss": 0.7346844673156738,
167
+ "eval_runtime": 3.9136,
168
+ "eval_samples_per_second": 197.261,
169
+ "eval_steps_per_second": 6.388,
170
  "step": 200
171
  },
172
  {
173
  "epoch": 1.134020618556701,
174
+ "grad_norm": 10.037001609802246,
175
  "learning_rate": 4.355670103092784e-06,
176
+ "loss": 0.628,
177
  "step": 220
178
  },
179
  {
180
  "epoch": 1.134020618556701,
181
+ "eval_f1_macro": 0.5775336754770706,
182
+ "eval_loss": 0.810948371887207,
183
+ "eval_runtime": 4.0413,
184
+ "eval_samples_per_second": 191.029,
185
+ "eval_steps_per_second": 6.186,
186
  "step": 220
187
  },
188
  {
189
  "epoch": 1.2371134020618557,
190
+ "grad_norm": 9.240849494934082,
191
  "learning_rate": 3.840206185567011e-06,
192
+ "loss": 0.6601,
193
  "step": 240
194
  },
195
  {
196
  "epoch": 1.2371134020618557,
197
+ "eval_f1_macro": 0.5821846549080626,
198
+ "eval_loss": 0.7480236291885376,
199
+ "eval_runtime": 3.9625,
200
+ "eval_samples_per_second": 194.826,
201
+ "eval_steps_per_second": 6.309,
202
  "step": 240
203
  },
204
  {
205
  "epoch": 1.3402061855670104,
206
+ "grad_norm": 7.726218223571777,
207
  "learning_rate": 3.324742268041237e-06,
208
+ "loss": 0.6286,
209
  "step": 260
210
  },
211
  {
212
  "epoch": 1.3402061855670104,
213
+ "eval_f1_macro": 0.5793924416627766,
214
+ "eval_loss": 0.7301892638206482,
215
+ "eval_runtime": 3.9159,
216
+ "eval_samples_per_second": 197.144,
217
+ "eval_steps_per_second": 6.384,
218
  "step": 260
219
  },
220
  {
221
  "epoch": 1.443298969072165,
222
+ "grad_norm": 8.408136367797852,
223
  "learning_rate": 2.809278350515464e-06,
224
+ "loss": 0.6048,
225
  "step": 280
226
  },
227
  {
228
  "epoch": 1.443298969072165,
229
+ "eval_f1_macro": 0.5846632256257394,
230
+ "eval_loss": 0.7338100075721741,
231
+ "eval_runtime": 3.9641,
232
+ "eval_samples_per_second": 194.749,
233
+ "eval_steps_per_second": 6.307,
234
  "step": 280
235
  },
236
  {
237
  "epoch": 1.5463917525773194,
238
+ "grad_norm": 8.853802680969238,
239
  "learning_rate": 2.293814432989691e-06,
240
+ "loss": 0.6399,
241
  "step": 300
242
  },
243
  {
244
  "epoch": 1.5463917525773194,
245
+ "eval_f1_macro": 0.5798440723934543,
246
+ "eval_loss": 0.7210143208503723,
247
+ "eval_runtime": 4.4541,
248
+ "eval_samples_per_second": 173.322,
249
+ "eval_steps_per_second": 5.613,
250
  "step": 300
251
  },
252
  {
253
  "epoch": 1.6494845360824741,
254
+ "grad_norm": 7.352003574371338,
255
  "learning_rate": 1.7783505154639178e-06,
256
+ "loss": 0.5882,
257
  "step": 320
258
  },
259
  {
260
  "epoch": 1.6494845360824741,
261
+ "eval_f1_macro": 0.5938381065356336,
262
+ "eval_loss": 0.7336726188659668,
263
+ "eval_runtime": 3.9872,
264
+ "eval_samples_per_second": 193.619,
265
+ "eval_steps_per_second": 6.27,
266
  "step": 320
267
  }
268
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a7fe80d89e3247b409545d8b84d73e41e961e2968dc1e02dd1e3d282557d941
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:697c137f0af9ca57111ce6bb1fdc70aa1d95192c2c6fdff6a83fa4dc1f74828b
3
  size 5905