Mandour-101 commited on
Commit
f7f8e29
·
verified ·
1 Parent(s): 9bb39bf

Upload folder using huggingface_hub

Browse files
checkpoint-24012/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MMoEViT"
4
+ ],
5
+ "torch_dtype": "float32",
6
+ "transformers_version": "4.47.0"
7
+ }
checkpoint-24012/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71f4cc6af02598af91d8f81fcfc3c54835d9aae0fde639f7c44b8742de4c97eb
3
+ size 359190616
checkpoint-24012/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ddf47d0d1e33cabbcd1c9aedb3564e96a4dd0a9983336fccdc0ccc2d279afb3
3
+ size 718509882
checkpoint-24012/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1918ecd20c1878b0401c6166770f4b5e553e5ae2878e7703adb3ee5f10c831
3
+ size 14244
checkpoint-24012/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:547be801b540007f8911172aaba1ef1e48aba1e11b2f98058477f0eaa860f4e1
3
+ size 1064
checkpoint-24012/trainer_state.json ADDED
@@ -0,0 +1,642 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9661077074670175,
3
+ "best_model_checkpoint": "/kaggle/working/mmoe_vit_results/checkpoint-24012",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 24012,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.12493753123438281,
13
+ "grad_norm": 2.205951452255249,
14
+ "learning_rate": 5e-05,
15
+ "loss": 0.7943,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.24987506246876562,
20
+ "grad_norm": 2.1307880878448486,
21
+ "learning_rate": 4.9091371665334015e-05,
22
+ "loss": 0.474,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.3748125937031484,
27
+ "grad_norm": 1.7951563596725464,
28
+ "learning_rate": 4.818274333066802e-05,
29
+ "loss": 0.4167,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.49975012493753124,
34
+ "grad_norm": 1.9876887798309326,
35
+ "learning_rate": 4.727411499600204e-05,
36
+ "loss": 0.3916,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.624687656171914,
41
+ "grad_norm": 3.0330495834350586,
42
+ "learning_rate": 4.636548666133605e-05,
43
+ "loss": 0.3704,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.7496251874062968,
48
+ "grad_norm": 2.0157809257507324,
49
+ "learning_rate": 4.545685832667006e-05,
50
+ "loss": 0.3577,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.8745627186406797,
55
+ "grad_norm": 1.7754454612731934,
56
+ "learning_rate": 4.4548229992004074e-05,
57
+ "loss": 0.3447,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.9995002498750625,
62
+ "grad_norm": 1.7371801137924194,
63
+ "learning_rate": 4.3639601657338087e-05,
64
+ "loss": 0.3348,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "eval_category_macro_f1": 0.6952318859438807,
70
+ "eval_category_precision": 0.956608825785308,
71
+ "eval_category_recall": 0.9564824339910845,
72
+ "eval_category_weighted_f1": 0.9548015657704888,
73
+ "eval_color_macro_f1": 0.26886041835616176,
74
+ "eval_color_precision": 0.742503302340083,
75
+ "eval_color_recall": 0.7711898749961034,
76
+ "eval_color_weighted_f1": 0.7494002787544956,
77
+ "eval_gender_macro_f1": 0.5551422720580631,
78
+ "eval_gender_precision": 0.9135315056899179,
79
+ "eval_gender_recall": 0.9142429626858692,
80
+ "eval_gender_weighted_f1": 0.9133261341390547,
81
+ "eval_loss": 0.3139565885066986,
82
+ "eval_material_macro_f1": 0.18175933111933457,
83
+ "eval_material_precision": 0.5510795574472717,
84
+ "eval_material_recall": 0.5989276473705539,
85
+ "eval_material_weighted_f1": 0.5328149786359672,
86
+ "eval_neck_macro_f1": 0.17476784999699163,
87
+ "eval_neck_precision": 0.770231966081002,
88
+ "eval_neck_recall": 0.767043860469466,
89
+ "eval_neck_weighted_f1": 0.7547329696141571,
90
+ "eval_pattern_macro_f1": 0.07495456820600468,
91
+ "eval_pattern_precision": 0.500470399822932,
92
+ "eval_pattern_recall": 0.5943763833037189,
93
+ "eval_pattern_weighted_f1": 0.5043529662664364,
94
+ "eval_product_type_macro_f1": 0.5286677539064947,
95
+ "eval_product_type_precision": 0.6574438114334396,
96
+ "eval_product_type_recall": 0.6384238910190467,
97
+ "eval_product_type_weighted_f1": 0.6249330379628022,
98
+ "eval_runtime": 307.6972,
99
+ "eval_samples_per_second": 104.255,
100
+ "eval_sleeve_macro_f1": 0.3129635925848871,
101
+ "eval_sleeve_precision": 0.8408246980460057,
102
+ "eval_sleeve_recall": 0.8423579288631192,
103
+ "eval_sleeve_weighted_f1": 0.830038616651521,
104
+ "eval_steps_per_second": 1.631,
105
+ "eval_style_macro_f1": 0.14293232343466503,
106
+ "eval_style_precision": 0.5728463487218707,
107
+ "eval_style_recall": 0.6262975778546713,
108
+ "eval_style_weighted_f1": 0.5804908017903951,
109
+ "step": 4002
110
+ },
111
+ {
112
+ "epoch": 1.1244377811094453,
113
+ "grad_norm": 1.4916481971740723,
114
+ "learning_rate": 4.27309733226721e-05,
115
+ "loss": 0.3134,
116
+ "step": 4500
117
+ },
118
+ {
119
+ "epoch": 1.249375312343828,
120
+ "grad_norm": 1.7547612190246582,
121
+ "learning_rate": 4.1822344988006106e-05,
122
+ "loss": 0.3112,
123
+ "step": 5000
124
+ },
125
+ {
126
+ "epoch": 1.3743128435782108,
127
+ "grad_norm": 1.4588732719421387,
128
+ "learning_rate": 4.091371665334012e-05,
129
+ "loss": 0.3051,
130
+ "step": 5500
131
+ },
132
+ {
133
+ "epoch": 1.4992503748125938,
134
+ "grad_norm": 1.2639210224151611,
135
+ "learning_rate": 4.000508831867413e-05,
136
+ "loss": 0.3029,
137
+ "step": 6000
138
+ },
139
+ {
140
+ "epoch": 1.6241879060469766,
141
+ "grad_norm": 1.4789787530899048,
142
+ "learning_rate": 3.9096459984008145e-05,
143
+ "loss": 0.2961,
144
+ "step": 6500
145
+ },
146
+ {
147
+ "epoch": 1.7491254372813594,
148
+ "grad_norm": 1.6470601558685303,
149
+ "learning_rate": 3.818783164934215e-05,
150
+ "loss": 0.2873,
151
+ "step": 7000
152
+ },
153
+ {
154
+ "epoch": 1.8740629685157422,
155
+ "grad_norm": 2.157299280166626,
156
+ "learning_rate": 3.7279203314676164e-05,
157
+ "loss": 0.2847,
158
+ "step": 7500
159
+ },
160
+ {
161
+ "epoch": 1.999000499750125,
162
+ "grad_norm": 1.4557589292526245,
163
+ "learning_rate": 3.637057498001018e-05,
164
+ "loss": 0.284,
165
+ "step": 8000
166
+ },
167
+ {
168
+ "epoch": 2.0,
169
+ "eval_category_macro_f1": 0.7731459035999274,
170
+ "eval_category_precision": 0.9621854954403526,
171
+ "eval_category_recall": 0.9620935814707441,
172
+ "eval_category_weighted_f1": 0.9611122266129147,
173
+ "eval_color_macro_f1": 0.33282064622069346,
174
+ "eval_color_precision": 0.7628507936505963,
175
+ "eval_color_recall": 0.7826927273294055,
176
+ "eval_color_weighted_f1": 0.7666531131477731,
177
+ "eval_gender_macro_f1": 0.6529045193901017,
178
+ "eval_gender_precision": 0.9310067709590051,
179
+ "eval_gender_recall": 0.9307023286262041,
180
+ "eval_gender_weighted_f1": 0.9300238040883521,
181
+ "eval_loss": 0.27808678150177,
182
+ "eval_material_macro_f1": 0.2672650569326831,
183
+ "eval_material_precision": 0.5829820463502581,
184
+ "eval_material_recall": 0.6170703575547867,
185
+ "eval_material_weighted_f1": 0.5634078038324131,
186
+ "eval_neck_macro_f1": 0.20473481727652498,
187
+ "eval_neck_precision": 0.7723201257575838,
188
+ "eval_neck_recall": 0.7876180678948845,
189
+ "eval_neck_weighted_f1": 0.768304127766941,
190
+ "eval_pattern_macro_f1": 0.11918967355596528,
191
+ "eval_pattern_precision": 0.5371433805351696,
192
+ "eval_pattern_recall": 0.6123320552386297,
193
+ "eval_pattern_weighted_f1": 0.5328905424758658,
194
+ "eval_product_type_macro_f1": 0.6181561032086245,
195
+ "eval_product_type_precision": 0.6896032942264115,
196
+ "eval_product_type_recall": 0.6778577885844321,
197
+ "eval_product_type_weighted_f1": 0.67171279677715,
198
+ "eval_runtime": 302.2512,
199
+ "eval_samples_per_second": 106.134,
200
+ "eval_sleeve_macro_f1": 0.34627852813600124,
201
+ "eval_sleeve_precision": 0.8389224468125812,
202
+ "eval_sleeve_recall": 0.8489042675893888,
203
+ "eval_sleeve_weighted_f1": 0.8375757784906263,
204
+ "eval_steps_per_second": 1.661,
205
+ "eval_style_macro_f1": 0.18292409377977412,
206
+ "eval_style_precision": 0.5948748921293634,
207
+ "eval_style_recall": 0.6488356868979707,
208
+ "eval_style_weighted_f1": 0.6017236316787142,
209
+ "step": 8004
210
+ },
211
+ {
212
+ "epoch": 2.1239380309845077,
213
+ "grad_norm": 1.367946743965149,
214
+ "learning_rate": 3.546194664534419e-05,
215
+ "loss": 0.2605,
216
+ "step": 8500
217
+ },
218
+ {
219
+ "epoch": 2.2488755622188905,
220
+ "grad_norm": 1.5504860877990723,
221
+ "learning_rate": 3.45533183106782e-05,
222
+ "loss": 0.2633,
223
+ "step": 9000
224
+ },
225
+ {
226
+ "epoch": 2.3738130934532733,
227
+ "grad_norm": 1.5913927555084229,
228
+ "learning_rate": 3.364468997601221e-05,
229
+ "loss": 0.2561,
230
+ "step": 9500
231
+ },
232
+ {
233
+ "epoch": 2.498750624687656,
234
+ "grad_norm": 1.7354265451431274,
235
+ "learning_rate": 3.273606164134623e-05,
236
+ "loss": 0.2571,
237
+ "step": 10000
238
+ },
239
+ {
240
+ "epoch": 2.623688155922039,
241
+ "grad_norm": 1.5740700960159302,
242
+ "learning_rate": 3.1827433306680235e-05,
243
+ "loss": 0.2533,
244
+ "step": 10500
245
+ },
246
+ {
247
+ "epoch": 2.7486256871564216,
248
+ "grad_norm": 1.3308007717132568,
249
+ "learning_rate": 3.091880497201425e-05,
250
+ "loss": 0.2539,
251
+ "step": 11000
252
+ },
253
+ {
254
+ "epoch": 2.873563218390805,
255
+ "grad_norm": 1.4897780418395996,
256
+ "learning_rate": 3.0010176637348258e-05,
257
+ "loss": 0.2471,
258
+ "step": 11500
259
+ },
260
+ {
261
+ "epoch": 2.9985007496251876,
262
+ "grad_norm": 1.9843637943267822,
263
+ "learning_rate": 2.9101548302682274e-05,
264
+ "loss": 0.2483,
265
+ "step": 12000
266
+ },
267
+ {
268
+ "epoch": 3.0,
269
+ "eval_category_macro_f1": 0.7903733270490069,
270
+ "eval_category_precision": 0.9645144186683499,
271
+ "eval_category_recall": 0.9642756943795006,
272
+ "eval_category_weighted_f1": 0.963310209667099,
273
+ "eval_color_macro_f1": 0.3621249155624305,
274
+ "eval_color_precision": 0.7802552212263318,
275
+ "eval_color_recall": 0.7986533246048817,
276
+ "eval_color_weighted_f1": 0.7810869424554918,
277
+ "eval_gender_macro_f1": 0.682238598199015,
278
+ "eval_gender_precision": 0.9267564337966526,
279
+ "eval_gender_recall": 0.9252782193958664,
280
+ "eval_gender_weighted_f1": 0.9249918474296774,
281
+ "eval_loss": 0.25919318199157715,
282
+ "eval_material_macro_f1": 0.3398667069927169,
283
+ "eval_material_precision": 0.5978318966832341,
284
+ "eval_material_recall": 0.6357741824869853,
285
+ "eval_material_weighted_f1": 0.5944051235584699,
286
+ "eval_neck_macro_f1": 0.23924719225254232,
287
+ "eval_neck_precision": 0.7806692719164586,
288
+ "eval_neck_recall": 0.7925122354188098,
289
+ "eval_neck_weighted_f1": 0.7790870348414296,
290
+ "eval_pattern_macro_f1": 0.1641511847711344,
291
+ "eval_pattern_precision": 0.5819393078338299,
292
+ "eval_pattern_recall": 0.6225879859097853,
293
+ "eval_pattern_weighted_f1": 0.5502139317041554,
294
+ "eval_product_type_macro_f1": 0.6583201290239673,
295
+ "eval_product_type_precision": 0.7128711001410035,
296
+ "eval_product_type_recall": 0.6994918794226753,
297
+ "eval_product_type_weighted_f1": 0.6929448155349042,
298
+ "eval_runtime": 301.2799,
299
+ "eval_samples_per_second": 106.476,
300
+ "eval_sleeve_macro_f1": 0.3563735801589765,
301
+ "eval_sleeve_precision": 0.8406013881476055,
302
+ "eval_sleeve_recall": 0.8499018049191059,
303
+ "eval_sleeve_weighted_f1": 0.840146351199092,
304
+ "eval_steps_per_second": 1.666,
305
+ "eval_style_macro_f1": 0.22006801367696224,
306
+ "eval_style_precision": 0.6224502543435393,
307
+ "eval_style_recall": 0.6560678325384208,
308
+ "eval_style_weighted_f1": 0.622977679977427,
309
+ "step": 12006
310
+ },
311
+ {
312
+ "epoch": 3.1234382808595704,
313
+ "grad_norm": 2.0150625705718994,
314
+ "learning_rate": 2.8192919968016284e-05,
315
+ "loss": 0.2251,
316
+ "step": 12500
317
+ },
318
+ {
319
+ "epoch": 3.248375812093953,
320
+ "grad_norm": 1.2645131349563599,
321
+ "learning_rate": 2.7284291633350297e-05,
322
+ "loss": 0.2251,
323
+ "step": 13000
324
+ },
325
+ {
326
+ "epoch": 3.373313343328336,
327
+ "grad_norm": 1.7234032154083252,
328
+ "learning_rate": 2.6375663298684306e-05,
329
+ "loss": 0.2239,
330
+ "step": 13500
331
+ },
332
+ {
333
+ "epoch": 3.4982508745627188,
334
+ "grad_norm": 1.676547646522522,
335
+ "learning_rate": 2.5467034964018323e-05,
336
+ "loss": 0.2254,
337
+ "step": 14000
338
+ },
339
+ {
340
+ "epoch": 3.6231884057971016,
341
+ "grad_norm": 1.3010063171386719,
342
+ "learning_rate": 2.4558406629352332e-05,
343
+ "loss": 0.2241,
344
+ "step": 14500
345
+ },
346
+ {
347
+ "epoch": 3.7481259370314843,
348
+ "grad_norm": 1.72947359085083,
349
+ "learning_rate": 2.3649778294686342e-05,
350
+ "loss": 0.2222,
351
+ "step": 15000
352
+ },
353
+ {
354
+ "epoch": 3.873063468265867,
355
+ "grad_norm": 1.47541081905365,
356
+ "learning_rate": 2.2741149960020355e-05,
357
+ "loss": 0.2177,
358
+ "step": 15500
359
+ },
360
+ {
361
+ "epoch": 3.99800099950025,
362
+ "grad_norm": 1.4984639883041382,
363
+ "learning_rate": 2.1832521625354364e-05,
364
+ "loss": 0.2146,
365
+ "step": 16000
366
+ },
367
+ {
368
+ "epoch": 4.0,
369
+ "eval_category_macro_f1": 0.8070428225617673,
370
+ "eval_category_precision": 0.9639301959345535,
371
+ "eval_category_recall": 0.9639951370055176,
372
+ "eval_category_weighted_f1": 0.9633956909548311,
373
+ "eval_color_macro_f1": 0.4160977179764728,
374
+ "eval_color_precision": 0.79551435648077,
375
+ "eval_color_recall": 0.80382804950279,
376
+ "eval_color_weighted_f1": 0.7932115174135331,
377
+ "eval_gender_macro_f1": 0.7138550749176814,
378
+ "eval_gender_precision": 0.9418271715431166,
379
+ "eval_gender_recall": 0.9409894323389133,
380
+ "eval_gender_weighted_f1": 0.9408785911526341,
381
+ "eval_loss": 0.24342668056488037,
382
+ "eval_material_macro_f1": 0.3788597805248959,
383
+ "eval_material_precision": 0.6203377475947315,
384
+ "eval_material_recall": 0.6495214938121513,
385
+ "eval_material_weighted_f1": 0.6137247593163535,
386
+ "eval_neck_macro_f1": 0.26417595340931804,
387
+ "eval_neck_precision": 0.7888860680739177,
388
+ "eval_neck_recall": 0.8017394557186944,
389
+ "eval_neck_weighted_f1": 0.7912357278702308,
390
+ "eval_pattern_macro_f1": 0.21137706069820827,
391
+ "eval_pattern_precision": 0.5896656959683353,
392
+ "eval_pattern_recall": 0.63402849215998,
393
+ "eval_pattern_weighted_f1": 0.5805939336450403,
394
+ "eval_product_type_macro_f1": 0.7027375784160153,
395
+ "eval_product_type_precision": 0.7257145790424636,
396
+ "eval_product_type_recall": 0.7205025094298451,
397
+ "eval_product_type_weighted_f1": 0.7160893066264747,
398
+ "eval_runtime": 301.2031,
399
+ "eval_samples_per_second": 106.503,
400
+ "eval_sleeve_macro_f1": 0.3797203601033849,
401
+ "eval_sleeve_precision": 0.8450184667180418,
402
+ "eval_sleeve_recall": 0.8539543003210823,
403
+ "eval_sleeve_weighted_f1": 0.8447728073486299,
404
+ "eval_steps_per_second": 1.667,
405
+ "eval_style_macro_f1": 0.25776208112470533,
406
+ "eval_style_precision": 0.636768112985809,
407
+ "eval_style_recall": 0.6687552604507622,
408
+ "eval_style_weighted_f1": 0.6370942948799759,
409
+ "step": 16008
410
+ },
411
+ {
412
+ "epoch": 4.122938530734633,
413
+ "grad_norm": 1.4649860858917236,
414
+ "learning_rate": 2.0923893290688377e-05,
415
+ "loss": 0.1968,
416
+ "step": 16500
417
+ },
418
+ {
419
+ "epoch": 4.2478760619690155,
420
+ "grad_norm": 1.7522532939910889,
421
+ "learning_rate": 2.001526495602239e-05,
422
+ "loss": 0.1952,
423
+ "step": 17000
424
+ },
425
+ {
426
+ "epoch": 4.372813593203398,
427
+ "grad_norm": 1.5114365816116333,
428
+ "learning_rate": 1.9106636621356403e-05,
429
+ "loss": 0.198,
430
+ "step": 17500
431
+ },
432
+ {
433
+ "epoch": 4.497751124437781,
434
+ "grad_norm": 1.7840216159820557,
435
+ "learning_rate": 1.8198008286690413e-05,
436
+ "loss": 0.1954,
437
+ "step": 18000
438
+ },
439
+ {
440
+ "epoch": 4.622688655672164,
441
+ "grad_norm": 1.5770485401153564,
442
+ "learning_rate": 1.7289379952024426e-05,
443
+ "loss": 0.1927,
444
+ "step": 18500
445
+ },
446
+ {
447
+ "epoch": 4.747626186906547,
448
+ "grad_norm": 1.5942214727401733,
449
+ "learning_rate": 1.6380751617358436e-05,
450
+ "loss": 0.1936,
451
+ "step": 19000
452
+ },
453
+ {
454
+ "epoch": 4.872563718140929,
455
+ "grad_norm": 1.1230299472808838,
456
+ "learning_rate": 1.547212328269245e-05,
457
+ "loss": 0.1907,
458
+ "step": 19500
459
+ },
460
+ {
461
+ "epoch": 4.997501249375312,
462
+ "grad_norm": 1.336006999015808,
463
+ "learning_rate": 1.456349494802646e-05,
464
+ "loss": 0.1921,
465
+ "step": 20000
466
+ },
467
+ {
468
+ "epoch": 5.0,
469
+ "eval_category_macro_f1": 0.814575381252187,
470
+ "eval_category_precision": 0.9663094811768377,
471
+ "eval_category_recall": 0.9665824994544717,
472
+ "eval_category_weighted_f1": 0.965923844387987,
473
+ "eval_color_macro_f1": 0.4369174781614063,
474
+ "eval_color_precision": 0.8028535608404194,
475
+ "eval_color_recall": 0.8148321331712335,
476
+ "eval_color_weighted_f1": 0.8028476456898314,
477
+ "eval_gender_macro_f1": 0.7053936478562223,
478
+ "eval_gender_precision": 0.9416777760492416,
479
+ "eval_gender_recall": 0.9407712210480377,
480
+ "eval_gender_weighted_f1": 0.9403681152060941,
481
+ "eval_loss": 0.23490393161773682,
482
+ "eval_material_macro_f1": 0.40133288032222664,
483
+ "eval_material_precision": 0.6313613296699556,
484
+ "eval_material_recall": 0.6625206521400293,
485
+ "eval_material_weighted_f1": 0.6270470446515838,
486
+ "eval_neck_macro_f1": 0.2814408126866265,
487
+ "eval_neck_precision": 0.8008124167021559,
488
+ "eval_neck_recall": 0.8062907197855295,
489
+ "eval_neck_weighted_f1": 0.7975722055581634,
490
+ "eval_pattern_macro_f1": 0.22699854595816307,
491
+ "eval_pattern_precision": 0.6042854492310323,
492
+ "eval_pattern_recall": 0.6455001714517286,
493
+ "eval_pattern_weighted_f1": 0.5909107309557631,
494
+ "eval_product_type_macro_f1": 0.7113036346548861,
495
+ "eval_product_type_precision": 0.7388496345969355,
496
+ "eval_product_type_recall": 0.7295426914804077,
497
+ "eval_product_type_weighted_f1": 0.726666644027167,
498
+ "eval_runtime": 301.3794,
499
+ "eval_samples_per_second": 106.441,
500
+ "eval_sleeve_macro_f1": 0.38500573083937306,
501
+ "eval_sleeve_precision": 0.8532874153559135,
502
+ "eval_sleeve_recall": 0.8566040088531438,
503
+ "eval_sleeve_weighted_f1": 0.8483550688145379,
504
+ "eval_steps_per_second": 1.666,
505
+ "eval_style_macro_f1": 0.2759097194358481,
506
+ "eval_style_precision": 0.636595640254917,
507
+ "eval_style_recall": 0.6738676392655631,
508
+ "eval_style_weighted_f1": 0.6414452136845251,
509
+ "step": 20010
510
+ },
511
+ {
512
+ "epoch": 5.122438780609695,
513
+ "grad_norm": 1.5047273635864258,
514
+ "learning_rate": 1.3654866613360473e-05,
515
+ "loss": 0.1717,
516
+ "step": 20500
517
+ },
518
+ {
519
+ "epoch": 5.247376311844078,
520
+ "grad_norm": 1.4382556676864624,
521
+ "learning_rate": 1.2746238278694484e-05,
522
+ "loss": 0.1693,
523
+ "step": 21000
524
+ },
525
+ {
526
+ "epoch": 5.3723138430784605,
527
+ "grad_norm": 1.5730925798416138,
528
+ "learning_rate": 1.1837609944028495e-05,
529
+ "loss": 0.1724,
530
+ "step": 21500
531
+ },
532
+ {
533
+ "epoch": 5.497251374312843,
534
+ "grad_norm": 1.8472751379013062,
535
+ "learning_rate": 1.0928981609362507e-05,
536
+ "loss": 0.1721,
537
+ "step": 22000
538
+ },
539
+ {
540
+ "epoch": 5.622188905547226,
541
+ "grad_norm": 1.3081119060516357,
542
+ "learning_rate": 1.0020353274696518e-05,
543
+ "loss": 0.1694,
544
+ "step": 22500
545
+ },
546
+ {
547
+ "epoch": 5.747126436781609,
548
+ "grad_norm": 1.9150363206863403,
549
+ "learning_rate": 9.111724940030531e-06,
550
+ "loss": 0.1701,
551
+ "step": 23000
552
+ },
553
+ {
554
+ "epoch": 5.872063968015992,
555
+ "grad_norm": 1.6012868881225586,
556
+ "learning_rate": 8.203096605364542e-06,
557
+ "loss": 0.1692,
558
+ "step": 23500
559
+ },
560
+ {
561
+ "epoch": 5.997001499250375,
562
+ "grad_norm": 2.1426620483398438,
563
+ "learning_rate": 7.294468270698554e-06,
564
+ "loss": 0.1663,
565
+ "step": 24000
566
+ },
567
+ {
568
+ "epoch": 6.0,
569
+ "eval_category_macro_f1": 0.8135808160841039,
570
+ "eval_category_precision": 0.9662816427280644,
571
+ "eval_category_recall": 0.9666136724960255,
572
+ "eval_category_weighted_f1": 0.9661077074670175,
573
+ "eval_color_macro_f1": 0.4600635003318152,
574
+ "eval_color_precision": 0.8112357172847174,
575
+ "eval_color_recall": 0.8206926649833224,
576
+ "eval_color_weighted_f1": 0.809040612878886,
577
+ "eval_gender_macro_f1": 0.7077574180619189,
578
+ "eval_gender_precision": 0.9483675936419935,
579
+ "eval_gender_recall": 0.9483150971040244,
580
+ "eval_gender_weighted_f1": 0.9479706364362375,
581
+ "eval_loss": 0.22788158059120178,
582
+ "eval_material_macro_f1": 0.41327594900590064,
583
+ "eval_material_precision": 0.6451838446086702,
584
+ "eval_material_recall": 0.6689422987000841,
585
+ "eval_material_weighted_f1": 0.6426236021542818,
586
+ "eval_neck_macro_f1": 0.29755762158064153,
587
+ "eval_neck_precision": 0.8025765460608351,
588
+ "eval_neck_recall": 0.8098444465226472,
589
+ "eval_neck_weighted_f1": 0.8028234665625918,
590
+ "eval_pattern_macro_f1": 0.25250915316096467,
591
+ "eval_pattern_precision": 0.6176590799884946,
592
+ "eval_pattern_recall": 0.6519218180117834,
593
+ "eval_pattern_weighted_f1": 0.601571471109787,
594
+ "eval_product_type_macro_f1": 0.7308082280687357,
595
+ "eval_product_type_precision": 0.7450374918864295,
596
+ "eval_product_type_recall": 0.7416690046447831,
597
+ "eval_product_type_weighted_f1": 0.7391305618836747,
598
+ "eval_runtime": 301.8571,
599
+ "eval_samples_per_second": 106.272,
600
+ "eval_sleeve_macro_f1": 0.40274936834626424,
601
+ "eval_sleeve_precision": 0.855686478933816,
602
+ "eval_sleeve_recall": 0.8593472365098662,
603
+ "eval_sleeve_weighted_f1": 0.851732539861904,
604
+ "eval_steps_per_second": 1.663,
605
+ "eval_style_macro_f1": 0.2996676235011164,
606
+ "eval_style_precision": 0.6484348800461253,
607
+ "eval_style_recall": 0.6805386701580474,
608
+ "eval_style_weighted_f1": 0.6505079281930336,
609
+ "step": 24012
610
+ }
611
+ ],
612
+ "logging_steps": 500,
613
+ "max_steps": 28014,
614
+ "num_input_tokens_seen": 0,
615
+ "num_train_epochs": 7,
616
+ "save_steps": 500,
617
+ "stateful_callbacks": {
618
+ "EarlyStoppingCallback": {
619
+ "args": {
620
+ "early_stopping_patience": 2,
621
+ "early_stopping_threshold": 0.0
622
+ },
623
+ "attributes": {
624
+ "early_stopping_patience_counter": 0
625
+ }
626
+ },
627
+ "TrainerControl": {
628
+ "args": {
629
+ "should_epoch_stop": false,
630
+ "should_evaluate": false,
631
+ "should_log": false,
632
+ "should_save": true,
633
+ "should_training_stop": false
634
+ },
635
+ "attributes": {}
636
+ }
637
+ },
638
+ "total_flos": 0.0,
639
+ "train_batch_size": 32,
640
+ "trial_name": null,
641
+ "trial_params": null
642
+ }
checkpoint-24012/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd16e73b326474644ca8ee6974df45fabbdd7162dc44cee9d339f980bcea75f
3
+ size 5368