darklorddad commited on
Commit
7c29739
·
verified ·
1 Parent(s): 72bcc55

Upload 5 files

Browse files
checkpoint-1073/config.json ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/swinv2-tiny-patch4-window16-256",
3
+ "_num_labels": 200,
4
+ "architectures": [
5
+ "Swinv2ForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 6,
12
+ 2
13
+ ],
14
+ "drop_path_rate": 0.1,
15
+ "embed_dim": 96,
16
+ "encoder_stride": 32,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.0,
19
+ "hidden_size": 768,
20
+ "id2label": {
21
+ "0": "acadian_flycatcher",
22
+ "1": "american_crow",
23
+ "2": "american_goldfinch",
24
+ "3": "american_pipit",
25
+ "4": "american_redstart",
26
+ "5": "american_three_toed_woodpecker",
27
+ "6": "anna_hummingbird",
28
+ "7": "artic_tern",
29
+ "8": "baird_sparrow",
30
+ "9": "baltimore_oriole",
31
+ "10": "bank_swallow",
32
+ "11": "barn_swallow",
33
+ "12": "bay_breasted_warbler",
34
+ "13": "belted_kingfisher",
35
+ "14": "bewick_wren",
36
+ "15": "black_and_white_warbler",
37
+ "16": "black_billed_cuckoo",
38
+ "17": "black_capped_vireo",
39
+ "18": "black_footed_albatross",
40
+ "19": "black_tern",
41
+ "20": "black_throated_blue_warbler",
42
+ "21": "black_throated_sparrow",
43
+ "22": "blue_grosbeak",
44
+ "23": "blue_headed_vireo",
45
+ "24": "blue_jay",
46
+ "25": "blue_winged_warbler",
47
+ "26": "boat_tailed_grackle",
48
+ "27": "bobolink",
49
+ "28": "bohemian_waxwing",
50
+ "29": "brandt_cormorant",
51
+ "30": "brewer_blackbird",
52
+ "31": "brewer_sparrow",
53
+ "32": "bronzed_cowbird",
54
+ "33": "brown_creeper",
55
+ "34": "brown_pelican",
56
+ "35": "brown_thrasher",
57
+ "36": "cactus_wren",
58
+ "37": "california_gull",
59
+ "38": "canada_warbler",
60
+ "39": "cape_glossy_starling",
61
+ "40": "cape_may_warbler",
62
+ "41": "cardinal",
63
+ "42": "carolina_wren",
64
+ "43": "caspian_tern",
65
+ "44": "cedar_waxwing",
66
+ "45": "cerulean_warbler",
67
+ "46": "chestnut_sided_warbler",
68
+ "47": "chipping_sparrow",
69
+ "48": "chuck_will_widow",
70
+ "49": "clark_nutcracker",
71
+ "50": "clay_colored_sparrow",
72
+ "51": "cliff_swallow",
73
+ "52": "common_raven",
74
+ "53": "common_tern",
75
+ "54": "common_yellowthroat",
76
+ "55": "crested_auklet",
77
+ "56": "dark_eyed_junco",
78
+ "57": "downy_woodpecker",
79
+ "58": "eared_grebe",
80
+ "59": "eastern_towhee",
81
+ "60": "elegant_tern",
82
+ "61": "european_goldfinch",
83
+ "62": "evening_grosbeak",
84
+ "63": "field_sparrow",
85
+ "64": "fish_crow",
86
+ "65": "florida_jay",
87
+ "66": "forsters_tern",
88
+ "67": "fox_sparrow",
89
+ "68": "frigatebird",
90
+ "69": "gadwall",
91
+ "70": "geococcyx",
92
+ "71": "glaucous_winged_gull",
93
+ "72": "golden_winged_warbler",
94
+ "73": "grasshopper_sparrow",
95
+ "74": "gray_catbird",
96
+ "75": "gray_crowned_rosy_finch",
97
+ "76": "gray_kingbird",
98
+ "77": "great_crested_flycatcher",
99
+ "78": "great_grey_shrike",
100
+ "79": "green_jay",
101
+ "80": "green_kingfisher",
102
+ "81": "green_tailed_towhee",
103
+ "82": "green_violetear",
104
+ "83": "groove_billed_ani",
105
+ "84": "harris_sparrow",
106
+ "85": "heermann_gull",
107
+ "86": "henslow_sparrow",
108
+ "87": "herring_gull",
109
+ "88": "hooded_merganser",
110
+ "89": "hooded_oriole",
111
+ "90": "hooded_warbler",
112
+ "91": "horned_grebe",
113
+ "92": "horned_lark",
114
+ "93": "horned_puffin",
115
+ "94": "house_sparrow",
116
+ "95": "house_wren",
117
+ "96": "indigo_bunting",
118
+ "97": "ivory_gull",
119
+ "98": "kentucky_warbler",
120
+ "99": "laysan_albatross",
121
+ "100": "lazuli_bunting",
122
+ "101": "le_conte_sparrow",
123
+ "102": "least_auklet",
124
+ "103": "least_flycatcher",
125
+ "104": "least_tern",
126
+ "105": "lincoln_sparrow",
127
+ "106": "loggerhead_shrike",
128
+ "107": "long_tailed_jaeger",
129
+ "108": "louisiana_waterthrush",
130
+ "109": "magnolia_warbler",
131
+ "110": "mallard",
132
+ "111": "mangrove_cuckoo",
133
+ "112": "marsh_wren",
134
+ "113": "mockingbird",
135
+ "114": "mourning_warbler",
136
+ "115": "myrtle_warbler",
137
+ "116": "nashville_warbler",
138
+ "117": "nelson_sharp_tailed_sparrow",
139
+ "118": "nighthawk",
140
+ "119": "northern_flicker",
141
+ "120": "northern_fulmar",
142
+ "121": "northern_waterthrush",
143
+ "122": "olive_sided_flycatcher",
144
+ "123": "orange_crowned_warbler",
145
+ "124": "orchard_oriole",
146
+ "125": "ovenbird",
147
+ "126": "pacific_loon",
148
+ "127": "painted_bunting",
149
+ "128": "palm_warbler",
150
+ "129": "parakeet_auklet",
151
+ "130": "pelagic_cormorant",
152
+ "131": "philadelphia_vireo",
153
+ "132": "pied_billed_grebe",
154
+ "133": "pied_kingfisher",
155
+ "134": "pigeon_guillemot",
156
+ "135": "pileated_woodpecker",
157
+ "136": "pine_grosbeak",
158
+ "137": "pine_warbler",
159
+ "138": "pomarine_jaeger",
160
+ "139": "prairie_warbler",
161
+ "140": "prothonotary_warbler",
162
+ "141": "purple_finch",
163
+ "142": "red_bellied_woodpecker",
164
+ "143": "red_breasted_merganser",
165
+ "144": "red_cockaded_woodpecker",
166
+ "145": "red_eyed_vireo",
167
+ "146": "red_faced_cormorant",
168
+ "147": "red_headed_woodpecker",
169
+ "148": "red_legged_kittiwake",
170
+ "149": "red_winged_blackbird",
171
+ "150": "rhinoceros_auklet",
172
+ "151": "ring_billed_gull",
173
+ "152": "ringed_kingfisher",
174
+ "153": "rock_wren",
175
+ "154": "rose_breasted_grosbeak",
176
+ "155": "ruby_throated_hummingbird",
177
+ "156": "rufous_hummingbird",
178
+ "157": "rusty_blackbird",
179
+ "158": "sage_thrasher",
180
+ "159": "savannah_sparrow",
181
+ "160": "sayornis",
182
+ "161": "scarlet_tanager",
183
+ "162": "scissor_tailed_flycatcher",
184
+ "163": "scott_oriole",
185
+ "164": "seaside_sparrow",
186
+ "165": "shiny_cowbird",
187
+ "166": "slaty_backed_gull",
188
+ "167": "song_sparrow",
189
+ "168": "sooty_albatross",
190
+ "169": "spotted_catbird",
191
+ "170": "summer_tanager",
192
+ "171": "swainson_warbler",
193
+ "172": "tennessee_warbler",
194
+ "173": "tree_sparrow",
195
+ "174": "tree_swallow",
196
+ "175": "tropical_kingbird",
197
+ "176": "vermilion_flycatcher",
198
+ "177": "vesper_sparrow",
199
+ "178": "warbling_vireo",
200
+ "179": "western_grebe",
201
+ "180": "western_gull",
202
+ "181": "western_meadowlark",
203
+ "182": "western_wood_pewee",
204
+ "183": "whip_poor_will",
205
+ "184": "white_breasted_kingfisher",
206
+ "185": "white_breasted_nuthatch",
207
+ "186": "white_crowned_sparrow",
208
+ "187": "white_eyed_vireo",
209
+ "188": "white_necked_raven",
210
+ "189": "white_pelican",
211
+ "190": "white_throated_sparrow",
212
+ "191": "wilson_warbler",
213
+ "192": "winter_wren",
214
+ "193": "worm_eating_warbler",
215
+ "194": "yellow_bellied_flycatcher",
216
+ "195": "yellow_billed_cuckoo",
217
+ "196": "yellow_breasted_chat",
218
+ "197": "yellow_headed_blackbird",
219
+ "198": "yellow_throated_vireo",
220
+ "199": "yellow_warbler"
221
+ },
222
+ "image_size": 256,
223
+ "initializer_range": 0.02,
224
+ "label2id": {
225
+ "acadian_flycatcher": 0,
226
+ "american_crow": 1,
227
+ "american_goldfinch": 2,
228
+ "american_pipit": 3,
229
+ "american_redstart": 4,
230
+ "american_three_toed_woodpecker": 5,
231
+ "anna_hummingbird": 6,
232
+ "artic_tern": 7,
233
+ "baird_sparrow": 8,
234
+ "baltimore_oriole": 9,
235
+ "bank_swallow": 10,
236
+ "barn_swallow": 11,
237
+ "bay_breasted_warbler": 12,
238
+ "belted_kingfisher": 13,
239
+ "bewick_wren": 14,
240
+ "black_and_white_warbler": 15,
241
+ "black_billed_cuckoo": 16,
242
+ "black_capped_vireo": 17,
243
+ "black_footed_albatross": 18,
244
+ "black_tern": 19,
245
+ "black_throated_blue_warbler": 20,
246
+ "black_throated_sparrow": 21,
247
+ "blue_grosbeak": 22,
248
+ "blue_headed_vireo": 23,
249
+ "blue_jay": 24,
250
+ "blue_winged_warbler": 25,
251
+ "boat_tailed_grackle": 26,
252
+ "bobolink": 27,
253
+ "bohemian_waxwing": 28,
254
+ "brandt_cormorant": 29,
255
+ "brewer_blackbird": 30,
256
+ "brewer_sparrow": 31,
257
+ "bronzed_cowbird": 32,
258
+ "brown_creeper": 33,
259
+ "brown_pelican": 34,
260
+ "brown_thrasher": 35,
261
+ "cactus_wren": 36,
262
+ "california_gull": 37,
263
+ "canada_warbler": 38,
264
+ "cape_glossy_starling": 39,
265
+ "cape_may_warbler": 40,
266
+ "cardinal": 41,
267
+ "carolina_wren": 42,
268
+ "caspian_tern": 43,
269
+ "cedar_waxwing": 44,
270
+ "cerulean_warbler": 45,
271
+ "chestnut_sided_warbler": 46,
272
+ "chipping_sparrow": 47,
273
+ "chuck_will_widow": 48,
274
+ "clark_nutcracker": 49,
275
+ "clay_colored_sparrow": 50,
276
+ "cliff_swallow": 51,
277
+ "common_raven": 52,
278
+ "common_tern": 53,
279
+ "common_yellowthroat": 54,
280
+ "crested_auklet": 55,
281
+ "dark_eyed_junco": 56,
282
+ "downy_woodpecker": 57,
283
+ "eared_grebe": 58,
284
+ "eastern_towhee": 59,
285
+ "elegant_tern": 60,
286
+ "european_goldfinch": 61,
287
+ "evening_grosbeak": 62,
288
+ "field_sparrow": 63,
289
+ "fish_crow": 64,
290
+ "florida_jay": 65,
291
+ "forsters_tern": 66,
292
+ "fox_sparrow": 67,
293
+ "frigatebird": 68,
294
+ "gadwall": 69,
295
+ "geococcyx": 70,
296
+ "glaucous_winged_gull": 71,
297
+ "golden_winged_warbler": 72,
298
+ "grasshopper_sparrow": 73,
299
+ "gray_catbird": 74,
300
+ "gray_crowned_rosy_finch": 75,
301
+ "gray_kingbird": 76,
302
+ "great_crested_flycatcher": 77,
303
+ "great_grey_shrike": 78,
304
+ "green_jay": 79,
305
+ "green_kingfisher": 80,
306
+ "green_tailed_towhee": 81,
307
+ "green_violetear": 82,
308
+ "groove_billed_ani": 83,
309
+ "harris_sparrow": 84,
310
+ "heermann_gull": 85,
311
+ "henslow_sparrow": 86,
312
+ "herring_gull": 87,
313
+ "hooded_merganser": 88,
314
+ "hooded_oriole": 89,
315
+ "hooded_warbler": 90,
316
+ "horned_grebe": 91,
317
+ "horned_lark": 92,
318
+ "horned_puffin": 93,
319
+ "house_sparrow": 94,
320
+ "house_wren": 95,
321
+ "indigo_bunting": 96,
322
+ "ivory_gull": 97,
323
+ "kentucky_warbler": 98,
324
+ "laysan_albatross": 99,
325
+ "lazuli_bunting": 100,
326
+ "le_conte_sparrow": 101,
327
+ "least_auklet": 102,
328
+ "least_flycatcher": 103,
329
+ "least_tern": 104,
330
+ "lincoln_sparrow": 105,
331
+ "loggerhead_shrike": 106,
332
+ "long_tailed_jaeger": 107,
333
+ "louisiana_waterthrush": 108,
334
+ "magnolia_warbler": 109,
335
+ "mallard": 110,
336
+ "mangrove_cuckoo": 111,
337
+ "marsh_wren": 112,
338
+ "mockingbird": 113,
339
+ "mourning_warbler": 114,
340
+ "myrtle_warbler": 115,
341
+ "nashville_warbler": 116,
342
+ "nelson_sharp_tailed_sparrow": 117,
343
+ "nighthawk": 118,
344
+ "northern_flicker": 119,
345
+ "northern_fulmar": 120,
346
+ "northern_waterthrush": 121,
347
+ "olive_sided_flycatcher": 122,
348
+ "orange_crowned_warbler": 123,
349
+ "orchard_oriole": 124,
350
+ "ovenbird": 125,
351
+ "pacific_loon": 126,
352
+ "painted_bunting": 127,
353
+ "palm_warbler": 128,
354
+ "parakeet_auklet": 129,
355
+ "pelagic_cormorant": 130,
356
+ "philadelphia_vireo": 131,
357
+ "pied_billed_grebe": 132,
358
+ "pied_kingfisher": 133,
359
+ "pigeon_guillemot": 134,
360
+ "pileated_woodpecker": 135,
361
+ "pine_grosbeak": 136,
362
+ "pine_warbler": 137,
363
+ "pomarine_jaeger": 138,
364
+ "prairie_warbler": 139,
365
+ "prothonotary_warbler": 140,
366
+ "purple_finch": 141,
367
+ "red_bellied_woodpecker": 142,
368
+ "red_breasted_merganser": 143,
369
+ "red_cockaded_woodpecker": 144,
370
+ "red_eyed_vireo": 145,
371
+ "red_faced_cormorant": 146,
372
+ "red_headed_woodpecker": 147,
373
+ "red_legged_kittiwake": 148,
374
+ "red_winged_blackbird": 149,
375
+ "rhinoceros_auklet": 150,
376
+ "ring_billed_gull": 151,
377
+ "ringed_kingfisher": 152,
378
+ "rock_wren": 153,
379
+ "rose_breasted_grosbeak": 154,
380
+ "ruby_throated_hummingbird": 155,
381
+ "rufous_hummingbird": 156,
382
+ "rusty_blackbird": 157,
383
+ "sage_thrasher": 158,
384
+ "savannah_sparrow": 159,
385
+ "sayornis": 160,
386
+ "scarlet_tanager": 161,
387
+ "scissor_tailed_flycatcher": 162,
388
+ "scott_oriole": 163,
389
+ "seaside_sparrow": 164,
390
+ "shiny_cowbird": 165,
391
+ "slaty_backed_gull": 166,
392
+ "song_sparrow": 167,
393
+ "sooty_albatross": 168,
394
+ "spotted_catbird": 169,
395
+ "summer_tanager": 170,
396
+ "swainson_warbler": 171,
397
+ "tennessee_warbler": 172,
398
+ "tree_sparrow": 173,
399
+ "tree_swallow": 174,
400
+ "tropical_kingbird": 175,
401
+ "vermilion_flycatcher": 176,
402
+ "vesper_sparrow": 177,
403
+ "warbling_vireo": 178,
404
+ "western_grebe": 179,
405
+ "western_gull": 180,
406
+ "western_meadowlark": 181,
407
+ "western_wood_pewee": 182,
408
+ "whip_poor_will": 183,
409
+ "white_breasted_kingfisher": 184,
410
+ "white_breasted_nuthatch": 185,
411
+ "white_crowned_sparrow": 186,
412
+ "white_eyed_vireo": 187,
413
+ "white_necked_raven": 188,
414
+ "white_pelican": 189,
415
+ "white_throated_sparrow": 190,
416
+ "wilson_warbler": 191,
417
+ "winter_wren": 192,
418
+ "worm_eating_warbler": 193,
419
+ "yellow_bellied_flycatcher": 194,
420
+ "yellow_billed_cuckoo": 195,
421
+ "yellow_breasted_chat": 196,
422
+ "yellow_headed_blackbird": 197,
423
+ "yellow_throated_vireo": 198,
424
+ "yellow_warbler": 199
425
+ },
426
+ "layer_norm_eps": 1e-05,
427
+ "mlp_ratio": 4.0,
428
+ "model_type": "swinv2",
429
+ "num_channels": 3,
430
+ "num_heads": [
431
+ 3,
432
+ 6,
433
+ 12,
434
+ 24
435
+ ],
436
+ "num_layers": 4,
437
+ "out_features": [
438
+ "stage4"
439
+ ],
440
+ "out_indices": [
441
+ 4
442
+ ],
443
+ "patch_size": 4,
444
+ "path_norm": true,
445
+ "pretrained_window_sizes": [
446
+ 0,
447
+ 0,
448
+ 0,
449
+ 0
450
+ ],
451
+ "problem_type": "single_label_classification",
452
+ "qkv_bias": true,
453
+ "stage_names": [
454
+ "stem",
455
+ "stage1",
456
+ "stage2",
457
+ "stage3",
458
+ "stage4"
459
+ ],
460
+ "torch_dtype": "float32",
461
+ "transformers_version": "4.48.0",
462
+ "use_absolute_embeddings": false,
463
+ "window_size": 16
464
+ }
checkpoint-1073/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf6032f9bf2e544ad0fa1c23268b7718baca4cfd7a23359d0be576a4a12ccd5d
3
+ size 14244
checkpoint-1073/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4912bd4aeceb08125a644060a027fa0b78f1334d81957eeaf0166b5262c59f5
3
+ size 1064
checkpoint-1073/trainer_state.json ADDED
@@ -0,0 +1,1635 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5218645334243774,
3
+ "best_model_checkpoint": "Model-SwinV2-Tiny-\\checkpoint-1073",
4
+ "epoch": 28.980132450331126,
5
+ "eval_steps": 7,
6
+ "global_step": 1073,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.18543046357615894,
13
+ "grad_norm": 2.4086875915527344,
14
+ "learning_rate": 9.45945945945946e-07,
15
+ "loss": 5.3322,
16
+ "step": 7
17
+ },
18
+ {
19
+ "epoch": 0.3708609271523179,
20
+ "grad_norm": 2.1939220428466797,
21
+ "learning_rate": 1.891891891891892e-06,
22
+ "loss": 5.3175,
23
+ "step": 14
24
+ },
25
+ {
26
+ "epoch": 0.5562913907284768,
27
+ "grad_norm": 2.2086760997772217,
28
+ "learning_rate": 2.837837837837838e-06,
29
+ "loss": 5.3225,
30
+ "step": 21
31
+ },
32
+ {
33
+ "epoch": 0.7417218543046358,
34
+ "grad_norm": 2.065668821334839,
35
+ "learning_rate": 3.783783783783784e-06,
36
+ "loss": 5.3296,
37
+ "step": 28
38
+ },
39
+ {
40
+ "epoch": 0.9271523178807947,
41
+ "grad_norm": 2.3158862590789795,
42
+ "learning_rate": 4.72972972972973e-06,
43
+ "loss": 5.322,
44
+ "step": 35
45
+ },
46
+ {
47
+ "epoch": 0.9801324503311258,
48
+ "eval_accuracy": 0.003278688524590164,
49
+ "eval_f1_macro": 0.0016666666666666666,
50
+ "eval_f1_micro": 0.003278688524590164,
51
+ "eval_f1_weighted": 0.0013661202185792348,
52
+ "eval_loss": 5.303685188293457,
53
+ "eval_precision_macro": 0.0010971403137865806,
54
+ "eval_precision_micro": 0.003278688524590164,
55
+ "eval_precision_weighted": 0.0008992953391693285,
56
+ "eval_recall_macro": 0.004,
57
+ "eval_recall_micro": 0.003278688524590164,
58
+ "eval_recall_weighted": 0.003278688524590164,
59
+ "eval_runtime": 38.1558,
60
+ "eval_samples_per_second": 31.974,
61
+ "eval_steps_per_second": 0.524,
62
+ "step": 37
63
+ },
64
+ {
65
+ "epoch": 1.1324503311258278,
66
+ "grad_norm": 2.483227252960205,
67
+ "learning_rate": 5.675675675675676e-06,
68
+ "loss": 5.5043,
69
+ "step": 42
70
+ },
71
+ {
72
+ "epoch": 1.3178807947019868,
73
+ "grad_norm": 2.05220627784729,
74
+ "learning_rate": 6.621621621621622e-06,
75
+ "loss": 5.3074,
76
+ "step": 49
77
+ },
78
+ {
79
+ "epoch": 1.5033112582781456,
80
+ "grad_norm": 2.017322301864624,
81
+ "learning_rate": 7.567567567567568e-06,
82
+ "loss": 5.3025,
83
+ "step": 56
84
+ },
85
+ {
86
+ "epoch": 1.6887417218543046,
87
+ "grad_norm": 2.014375925064087,
88
+ "learning_rate": 8.513513513513514e-06,
89
+ "loss": 5.2928,
90
+ "step": 63
91
+ },
92
+ {
93
+ "epoch": 1.8741721854304636,
94
+ "grad_norm": 2.1785197257995605,
95
+ "learning_rate": 9.45945945945946e-06,
96
+ "loss": 5.2972,
97
+ "step": 70
98
+ },
99
+ {
100
+ "epoch": 1.980132450331126,
101
+ "eval_accuracy": 0.00819672131147541,
102
+ "eval_f1_macro": 0.004857079414838036,
103
+ "eval_f1_micro": 0.00819672131147541,
104
+ "eval_f1_weighted": 0.004755061925073231,
105
+ "eval_loss": 5.265906810760498,
106
+ "eval_precision_macro": 0.003902958152958153,
107
+ "eval_precision_micro": 0.00819672131147541,
108
+ "eval_precision_weighted": 0.003934248811297992,
109
+ "eval_recall_macro": 0.008630952380952382,
110
+ "eval_recall_micro": 0.00819672131147541,
111
+ "eval_recall_weighted": 0.00819672131147541,
112
+ "eval_runtime": 38.2173,
113
+ "eval_samples_per_second": 31.923,
114
+ "eval_steps_per_second": 0.523,
115
+ "step": 74
116
+ },
117
+ {
118
+ "epoch": 2.0794701986754967,
119
+ "grad_norm": 2.0930473804473877,
120
+ "learning_rate": 1.0405405405405407e-05,
121
+ "loss": 5.472,
122
+ "step": 77
123
+ },
124
+ {
125
+ "epoch": 2.2649006622516556,
126
+ "grad_norm": 2.3473684787750244,
127
+ "learning_rate": 1.1351351351351352e-05,
128
+ "loss": 5.2621,
129
+ "step": 84
130
+ },
131
+ {
132
+ "epoch": 2.4503311258278146,
133
+ "grad_norm": 2.268230676651001,
134
+ "learning_rate": 1.2297297297297299e-05,
135
+ "loss": 5.2555,
136
+ "step": 91
137
+ },
138
+ {
139
+ "epoch": 2.6357615894039736,
140
+ "grad_norm": 2.3846263885498047,
141
+ "learning_rate": 1.3243243243243244e-05,
142
+ "loss": 5.2442,
143
+ "step": 98
144
+ },
145
+ {
146
+ "epoch": 2.821192052980132,
147
+ "grad_norm": 2.452064037322998,
148
+ "learning_rate": 1.4189189189189189e-05,
149
+ "loss": 5.2228,
150
+ "step": 105
151
+ },
152
+ {
153
+ "epoch": 2.980132450331126,
154
+ "eval_accuracy": 0.030327868852459017,
155
+ "eval_f1_macro": 0.01780828862483212,
156
+ "eval_f1_micro": 0.030327868852459017,
157
+ "eval_f1_weighted": 0.018862280549754456,
158
+ "eval_loss": 5.171996593475342,
159
+ "eval_precision_macro": 0.01519444224935711,
160
+ "eval_precision_micro": 0.030327868852459017,
161
+ "eval_precision_weighted": 0.01609051329492379,
162
+ "eval_recall_macro": 0.028630952380952382,
163
+ "eval_recall_micro": 0.030327868852459017,
164
+ "eval_recall_weighted": 0.030327868852459017,
165
+ "eval_runtime": 40.0831,
166
+ "eval_samples_per_second": 30.437,
167
+ "eval_steps_per_second": 0.499,
168
+ "step": 111
169
+ },
170
+ {
171
+ "epoch": 3.0264900662251657,
172
+ "grad_norm": 3.297703981399536,
173
+ "learning_rate": 1.5135135135135136e-05,
174
+ "loss": 5.4031,
175
+ "step": 112
176
+ },
177
+ {
178
+ "epoch": 3.2119205298013247,
179
+ "grad_norm": 3.164050817489624,
180
+ "learning_rate": 1.6081081081081083e-05,
181
+ "loss": 5.1527,
182
+ "step": 119
183
+ },
184
+ {
185
+ "epoch": 3.3973509933774833,
186
+ "grad_norm": 3.7540459632873535,
187
+ "learning_rate": 1.7027027027027028e-05,
188
+ "loss": 5.1426,
189
+ "step": 126
190
+ },
191
+ {
192
+ "epoch": 3.5827814569536423,
193
+ "grad_norm": 4.779909610748291,
194
+ "learning_rate": 1.7972972972972973e-05,
195
+ "loss": 5.0713,
196
+ "step": 133
197
+ },
198
+ {
199
+ "epoch": 3.7682119205298013,
200
+ "grad_norm": 5.369205951690674,
201
+ "learning_rate": 1.891891891891892e-05,
202
+ "loss": 5.0499,
203
+ "step": 140
204
+ },
205
+ {
206
+ "epoch": 3.9536423841059603,
207
+ "grad_norm": 5.1240763664245605,
208
+ "learning_rate": 1.9864864864864866e-05,
209
+ "loss": 4.9613,
210
+ "step": 147
211
+ },
212
+ {
213
+ "epoch": 3.980132450331126,
214
+ "eval_accuracy": 0.08524590163934426,
215
+ "eval_f1_macro": 0.051001666629235665,
216
+ "eval_f1_micro": 0.08524590163934426,
217
+ "eval_f1_weighted": 0.054366012035255526,
218
+ "eval_loss": 4.813209056854248,
219
+ "eval_precision_macro": 0.05762040187775336,
220
+ "eval_precision_micro": 0.08524590163934426,
221
+ "eval_precision_weighted": 0.06070733267814678,
222
+ "eval_recall_macro": 0.07859523809523809,
223
+ "eval_recall_micro": 0.08524590163934426,
224
+ "eval_recall_weighted": 0.08524590163934426,
225
+ "eval_runtime": 58.6633,
226
+ "eval_samples_per_second": 20.797,
227
+ "eval_steps_per_second": 0.341,
228
+ "step": 148
229
+ },
230
+ {
231
+ "epoch": 4.158940397350993,
232
+ "grad_norm": 5.685731887817383,
233
+ "learning_rate": 2.0810810810810815e-05,
234
+ "loss": 5.0257,
235
+ "step": 154
236
+ },
237
+ {
238
+ "epoch": 4.344370860927152,
239
+ "grad_norm": 6.418041706085205,
240
+ "learning_rate": 2.1756756756756756e-05,
241
+ "loss": 4.7466,
242
+ "step": 161
243
+ },
244
+ {
245
+ "epoch": 4.529801324503311,
246
+ "grad_norm": 7.51582670211792,
247
+ "learning_rate": 2.2702702702702705e-05,
248
+ "loss": 4.6731,
249
+ "step": 168
250
+ },
251
+ {
252
+ "epoch": 4.71523178807947,
253
+ "grad_norm": 9.745444297790527,
254
+ "learning_rate": 2.364864864864865e-05,
255
+ "loss": 4.504,
256
+ "step": 175
257
+ },
258
+ {
259
+ "epoch": 4.900662251655629,
260
+ "grad_norm": 9.422097206115723,
261
+ "learning_rate": 2.4594594594594598e-05,
262
+ "loss": 4.3707,
263
+ "step": 182
264
+ },
265
+ {
266
+ "epoch": 4.9801324503311255,
267
+ "eval_accuracy": 0.21065573770491802,
268
+ "eval_f1_macro": 0.1669333623208855,
269
+ "eval_f1_micro": 0.21065573770491802,
270
+ "eval_f1_weighted": 0.1759151675061853,
271
+ "eval_loss": 3.8381166458129883,
272
+ "eval_precision_macro": 0.20365623267847385,
273
+ "eval_precision_micro": 0.21065573770491802,
274
+ "eval_precision_weighted": 0.20953591971339453,
275
+ "eval_recall_macro": 0.19770833333333332,
276
+ "eval_recall_micro": 0.21065573770491802,
277
+ "eval_recall_weighted": 0.21065573770491802,
278
+ "eval_runtime": 44.1331,
279
+ "eval_samples_per_second": 27.644,
280
+ "eval_steps_per_second": 0.453,
281
+ "step": 185
282
+ },
283
+ {
284
+ "epoch": 5.105960264900662,
285
+ "grad_norm": 9.648882865905762,
286
+ "learning_rate": 2.5540540540540543e-05,
287
+ "loss": 4.2878,
288
+ "step": 189
289
+ },
290
+ {
291
+ "epoch": 5.291390728476821,
292
+ "grad_norm": 11.289594650268555,
293
+ "learning_rate": 2.6486486486486488e-05,
294
+ "loss": 3.8794,
295
+ "step": 196
296
+ },
297
+ {
298
+ "epoch": 5.47682119205298,
299
+ "grad_norm": 10.757856369018555,
300
+ "learning_rate": 2.7297297297297298e-05,
301
+ "loss": 3.7505,
302
+ "step": 203
303
+ },
304
+ {
305
+ "epoch": 5.662251655629139,
306
+ "grad_norm": 16.969234466552734,
307
+ "learning_rate": 2.8243243243243246e-05,
308
+ "loss": 3.7323,
309
+ "step": 210
310
+ },
311
+ {
312
+ "epoch": 5.847682119205298,
313
+ "grad_norm": 11.33627700805664,
314
+ "learning_rate": 2.918918918918919e-05,
315
+ "loss": 3.5056,
316
+ "step": 217
317
+ },
318
+ {
319
+ "epoch": 5.9801324503311255,
320
+ "eval_accuracy": 0.37868852459016394,
321
+ "eval_f1_macro": 0.32624656613429687,
322
+ "eval_f1_micro": 0.37868852459016394,
323
+ "eval_f1_weighted": 0.3377910002445998,
324
+ "eval_loss": 2.865111827850342,
325
+ "eval_precision_macro": 0.37830690588089,
326
+ "eval_precision_micro": 0.37868852459016394,
327
+ "eval_precision_weighted": 0.38336785853552136,
328
+ "eval_recall_macro": 0.3615535714285714,
329
+ "eval_recall_micro": 0.37868852459016394,
330
+ "eval_recall_weighted": 0.37868852459016394,
331
+ "eval_runtime": 48.2126,
332
+ "eval_samples_per_second": 25.305,
333
+ "eval_steps_per_second": 0.415,
334
+ "step": 222
335
+ },
336
+ {
337
+ "epoch": 6.052980132450331,
338
+ "grad_norm": 11.259369850158691,
339
+ "learning_rate": 3e-05,
340
+ "loss": 3.6028,
341
+ "step": 224
342
+ },
343
+ {
344
+ "epoch": 6.23841059602649,
345
+ "grad_norm": 13.352005004882812,
346
+ "learning_rate": 3.0945945945945946e-05,
347
+ "loss": 3.1603,
348
+ "step": 231
349
+ },
350
+ {
351
+ "epoch": 6.423841059602649,
352
+ "grad_norm": 12.193765640258789,
353
+ "learning_rate": 3.1891891891891894e-05,
354
+ "loss": 3.0664,
355
+ "step": 238
356
+ },
357
+ {
358
+ "epoch": 6.609271523178808,
359
+ "grad_norm": 12.009160995483398,
360
+ "learning_rate": 3.2837837837837836e-05,
361
+ "loss": 2.8827,
362
+ "step": 245
363
+ },
364
+ {
365
+ "epoch": 6.7947019867549665,
366
+ "grad_norm": 11.383013725280762,
367
+ "learning_rate": 3.3783783783783784e-05,
368
+ "loss": 2.8152,
369
+ "step": 252
370
+ },
371
+ {
372
+ "epoch": 6.9801324503311255,
373
+ "grad_norm": 14.499237060546875,
374
+ "learning_rate": 3.472972972972973e-05,
375
+ "loss": 2.8677,
376
+ "step": 259
377
+ },
378
+ {
379
+ "epoch": 6.9801324503311255,
380
+ "eval_accuracy": 0.5237704918032787,
381
+ "eval_f1_macro": 0.4860099386723001,
382
+ "eval_f1_micro": 0.5237704918032787,
383
+ "eval_f1_weighted": 0.497629485231393,
384
+ "eval_loss": 2.090391159057617,
385
+ "eval_precision_macro": 0.5416790735494296,
386
+ "eval_precision_micro": 0.5237704918032787,
387
+ "eval_precision_weighted": 0.5489188118847942,
388
+ "eval_recall_macro": 0.5090833333333333,
389
+ "eval_recall_micro": 0.5237704918032787,
390
+ "eval_recall_weighted": 0.5237704918032787,
391
+ "eval_runtime": 45.6118,
392
+ "eval_samples_per_second": 26.747,
393
+ "eval_steps_per_second": 0.438,
394
+ "step": 259
395
+ },
396
+ {
397
+ "epoch": 7.185430463576159,
398
+ "grad_norm": 12.168959617614746,
399
+ "learning_rate": 3.567567567567568e-05,
400
+ "loss": 2.7083,
401
+ "step": 266
402
+ },
403
+ {
404
+ "epoch": 7.370860927152318,
405
+ "grad_norm": 11.984298706054688,
406
+ "learning_rate": 3.662162162162162e-05,
407
+ "loss": 2.552,
408
+ "step": 273
409
+ },
410
+ {
411
+ "epoch": 7.556291390728477,
412
+ "grad_norm": 12.778799057006836,
413
+ "learning_rate": 3.756756756756757e-05,
414
+ "loss": 2.4453,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 7.741721854304636,
419
+ "grad_norm": 12.07343864440918,
420
+ "learning_rate": 3.851351351351351e-05,
421
+ "loss": 2.3233,
422
+ "step": 287
423
+ },
424
+ {
425
+ "epoch": 7.927152317880795,
426
+ "grad_norm": 11.423672676086426,
427
+ "learning_rate": 3.945945945945946e-05,
428
+ "loss": 2.2578,
429
+ "step": 294
430
+ },
431
+ {
432
+ "epoch": 7.9801324503311255,
433
+ "eval_accuracy": 0.6139344262295082,
434
+ "eval_f1_macro": 0.5881810404081942,
435
+ "eval_f1_micro": 0.6139344262295082,
436
+ "eval_f1_weighted": 0.596493094032159,
437
+ "eval_loss": 1.5912988185882568,
438
+ "eval_precision_macro": 0.6479628814812638,
439
+ "eval_precision_micro": 0.6139344262295082,
440
+ "eval_precision_weighted": 0.6507304591369202,
441
+ "eval_recall_macro": 0.6029464285714285,
442
+ "eval_recall_micro": 0.6139344262295082,
443
+ "eval_recall_weighted": 0.6139344262295082,
444
+ "eval_runtime": 43.3133,
445
+ "eval_samples_per_second": 28.167,
446
+ "eval_steps_per_second": 0.462,
447
+ "step": 296
448
+ },
449
+ {
450
+ "epoch": 8.132450331125828,
451
+ "grad_norm": 11.603616714477539,
452
+ "learning_rate": 4.04054054054054e-05,
453
+ "loss": 2.1519,
454
+ "step": 301
455
+ },
456
+ {
457
+ "epoch": 8.317880794701987,
458
+ "grad_norm": 13.292801856994629,
459
+ "learning_rate": 4.1216216216216216e-05,
460
+ "loss": 2.1126,
461
+ "step": 308
462
+ },
463
+ {
464
+ "epoch": 8.503311258278146,
465
+ "grad_norm": 10.869479179382324,
466
+ "learning_rate": 4.2162162162162164e-05,
467
+ "loss": 2.0236,
468
+ "step": 315
469
+ },
470
+ {
471
+ "epoch": 8.688741721854305,
472
+ "grad_norm": 12.716577529907227,
473
+ "learning_rate": 4.3108108108108106e-05,
474
+ "loss": 1.887,
475
+ "step": 322
476
+ },
477
+ {
478
+ "epoch": 8.874172185430464,
479
+ "grad_norm": 11.295136451721191,
480
+ "learning_rate": 4.4054054054054054e-05,
481
+ "loss": 1.876,
482
+ "step": 329
483
+ },
484
+ {
485
+ "epoch": 8.980132450331126,
486
+ "eval_accuracy": 0.6721311475409836,
487
+ "eval_f1_macro": 0.6580130012428606,
488
+ "eval_f1_micro": 0.6721311475409836,
489
+ "eval_f1_weighted": 0.6639418969334906,
490
+ "eval_loss": 1.2565070390701294,
491
+ "eval_precision_macro": 0.7232300878945616,
492
+ "eval_precision_micro": 0.6721311475409836,
493
+ "eval_precision_weighted": 0.7223922360953206,
494
+ "eval_recall_macro": 0.6628571428571429,
495
+ "eval_recall_micro": 0.6721311475409836,
496
+ "eval_recall_weighted": 0.6721311475409836,
497
+ "eval_runtime": 40.0516,
498
+ "eval_samples_per_second": 30.461,
499
+ "eval_steps_per_second": 0.499,
500
+ "step": 333
501
+ },
502
+ {
503
+ "epoch": 9.079470198675496,
504
+ "grad_norm": 11.199146270751953,
505
+ "learning_rate": 4.5e-05,
506
+ "loss": 1.8298,
507
+ "step": 336
508
+ },
509
+ {
510
+ "epoch": 9.264900662251655,
511
+ "grad_norm": 11.203076362609863,
512
+ "learning_rate": 4.594594594594595e-05,
513
+ "loss": 1.709,
514
+ "step": 343
515
+ },
516
+ {
517
+ "epoch": 9.450331125827814,
518
+ "grad_norm": 10.677241325378418,
519
+ "learning_rate": 4.689189189189189e-05,
520
+ "loss": 1.6401,
521
+ "step": 350
522
+ },
523
+ {
524
+ "epoch": 9.635761589403973,
525
+ "grad_norm": 10.3362398147583,
526
+ "learning_rate": 4.783783783783784e-05,
527
+ "loss": 1.6023,
528
+ "step": 357
529
+ },
530
+ {
531
+ "epoch": 9.821192052980132,
532
+ "grad_norm": 10.059170722961426,
533
+ "learning_rate": 4.878378378378379e-05,
534
+ "loss": 1.6111,
535
+ "step": 364
536
+ },
537
+ {
538
+ "epoch": 9.980132450331126,
539
+ "eval_accuracy": 0.7163934426229508,
540
+ "eval_f1_macro": 0.6956014124577357,
541
+ "eval_f1_micro": 0.7163934426229508,
542
+ "eval_f1_weighted": 0.7006354276645078,
543
+ "eval_loss": 1.0965081453323364,
544
+ "eval_precision_macro": 0.7575524683649684,
545
+ "eval_precision_micro": 0.7163934426229508,
546
+ "eval_precision_weighted": 0.7569573720905689,
547
+ "eval_recall_macro": 0.7082083333333332,
548
+ "eval_recall_micro": 0.7163934426229508,
549
+ "eval_recall_weighted": 0.7163934426229508,
550
+ "eval_runtime": 46.3112,
551
+ "eval_samples_per_second": 26.344,
552
+ "eval_steps_per_second": 0.432,
553
+ "step": 370
554
+ },
555
+ {
556
+ "epoch": 10.026490066225165,
557
+ "grad_norm": 10.692099571228027,
558
+ "learning_rate": 4.972972972972974e-05,
559
+ "loss": 1.5381,
560
+ "step": 371
561
+ },
562
+ {
563
+ "epoch": 10.211920529801324,
564
+ "grad_norm": 9.687626838684082,
565
+ "learning_rate": 4.9924924924924924e-05,
566
+ "loss": 1.4227,
567
+ "step": 378
568
+ },
569
+ {
570
+ "epoch": 10.397350993377483,
571
+ "grad_norm": 9.885163307189941,
572
+ "learning_rate": 4.9819819819819824e-05,
573
+ "loss": 1.4277,
574
+ "step": 385
575
+ },
576
+ {
577
+ "epoch": 10.582781456953642,
578
+ "grad_norm": 19.606826782226562,
579
+ "learning_rate": 4.971471471471472e-05,
580
+ "loss": 1.3542,
581
+ "step": 392
582
+ },
583
+ {
584
+ "epoch": 10.7682119205298,
585
+ "grad_norm": 10.894911766052246,
586
+ "learning_rate": 4.960960960960962e-05,
587
+ "loss": 1.4136,
588
+ "step": 399
589
+ },
590
+ {
591
+ "epoch": 10.95364238410596,
592
+ "grad_norm": 11.28426456451416,
593
+ "learning_rate": 4.95045045045045e-05,
594
+ "loss": 1.4096,
595
+ "step": 406
596
+ },
597
+ {
598
+ "epoch": 10.980132450331126,
599
+ "eval_accuracy": 0.7442622950819672,
600
+ "eval_f1_macro": 0.7309345112923151,
601
+ "eval_f1_micro": 0.7442622950819672,
602
+ "eval_f1_weighted": 0.7361964062069708,
603
+ "eval_loss": 0.9353717565536499,
604
+ "eval_precision_macro": 0.785672709235209,
605
+ "eval_precision_micro": 0.7442622950819672,
606
+ "eval_precision_weighted": 0.7851744168854825,
607
+ "eval_recall_macro": 0.735404761904762,
608
+ "eval_recall_micro": 0.7442622950819672,
609
+ "eval_recall_weighted": 0.7442622950819672,
610
+ "eval_runtime": 43.3522,
611
+ "eval_samples_per_second": 28.142,
612
+ "eval_steps_per_second": 0.461,
613
+ "step": 407
614
+ },
615
+ {
616
+ "epoch": 11.158940397350994,
617
+ "grad_norm": 8.755209922790527,
618
+ "learning_rate": 4.93993993993994e-05,
619
+ "loss": 1.2942,
620
+ "step": 413
621
+ },
622
+ {
623
+ "epoch": 11.344370860927153,
624
+ "grad_norm": 9.342432975769043,
625
+ "learning_rate": 4.9294294294294296e-05,
626
+ "loss": 1.221,
627
+ "step": 420
628
+ },
629
+ {
630
+ "epoch": 11.52980132450331,
631
+ "grad_norm": 10.622260093688965,
632
+ "learning_rate": 4.9189189189189196e-05,
633
+ "loss": 1.2387,
634
+ "step": 427
635
+ },
636
+ {
637
+ "epoch": 11.71523178807947,
638
+ "grad_norm": 9.346344947814941,
639
+ "learning_rate": 4.908408408408409e-05,
640
+ "loss": 1.2545,
641
+ "step": 434
642
+ },
643
+ {
644
+ "epoch": 11.900662251655628,
645
+ "grad_norm": 9.809884071350098,
646
+ "learning_rate": 4.8978978978978976e-05,
647
+ "loss": 1.2697,
648
+ "step": 441
649
+ },
650
+ {
651
+ "epoch": 11.980132450331126,
652
+ "eval_accuracy": 0.7836065573770492,
653
+ "eval_f1_macro": 0.7754873092164115,
654
+ "eval_f1_micro": 0.7836065573770492,
655
+ "eval_f1_weighted": 0.7788164207329435,
656
+ "eval_loss": 0.8078306913375854,
657
+ "eval_precision_macro": 0.8157236513486512,
658
+ "eval_precision_micro": 0.7836065573770492,
659
+ "eval_precision_weighted": 0.8160415404267863,
660
+ "eval_recall_macro": 0.7780952380952381,
661
+ "eval_recall_micro": 0.7836065573770492,
662
+ "eval_recall_weighted": 0.7836065573770492,
663
+ "eval_runtime": 53.3885,
664
+ "eval_samples_per_second": 22.851,
665
+ "eval_steps_per_second": 0.375,
666
+ "step": 444
667
+ },
668
+ {
669
+ "epoch": 12.105960264900663,
670
+ "grad_norm": 9.783758163452148,
671
+ "learning_rate": 4.8873873873873876e-05,
672
+ "loss": 1.2426,
673
+ "step": 448
674
+ },
675
+ {
676
+ "epoch": 12.291390728476822,
677
+ "grad_norm": 8.787826538085938,
678
+ "learning_rate": 4.876876876876877e-05,
679
+ "loss": 1.1275,
680
+ "step": 455
681
+ },
682
+ {
683
+ "epoch": 12.47682119205298,
684
+ "grad_norm": 10.826144218444824,
685
+ "learning_rate": 4.866366366366367e-05,
686
+ "loss": 1.1154,
687
+ "step": 462
688
+ },
689
+ {
690
+ "epoch": 12.66225165562914,
691
+ "grad_norm": 9.54553508758545,
692
+ "learning_rate": 4.855855855855856e-05,
693
+ "loss": 1.089,
694
+ "step": 469
695
+ },
696
+ {
697
+ "epoch": 12.847682119205299,
698
+ "grad_norm": 10.958932876586914,
699
+ "learning_rate": 4.8453453453453455e-05,
700
+ "loss": 1.0683,
701
+ "step": 476
702
+ },
703
+ {
704
+ "epoch": 12.980132450331126,
705
+ "eval_accuracy": 0.7877049180327869,
706
+ "eval_f1_macro": 0.7809517814057289,
707
+ "eval_f1_micro": 0.7877049180327869,
708
+ "eval_f1_weighted": 0.7842625439420977,
709
+ "eval_loss": 0.7565202116966248,
710
+ "eval_precision_macro": 0.8255538701494585,
711
+ "eval_precision_micro": 0.7877049180327869,
712
+ "eval_precision_weighted": 0.8263731434910316,
713
+ "eval_recall_macro": 0.7832797619047619,
714
+ "eval_recall_micro": 0.7877049180327869,
715
+ "eval_recall_weighted": 0.7877049180327869,
716
+ "eval_runtime": 44.7029,
717
+ "eval_samples_per_second": 27.291,
718
+ "eval_steps_per_second": 0.447,
719
+ "step": 481
720
+ },
721
+ {
722
+ "epoch": 13.052980132450331,
723
+ "grad_norm": 10.9450044631958,
724
+ "learning_rate": 4.834834834834835e-05,
725
+ "loss": 1.1861,
726
+ "step": 483
727
+ },
728
+ {
729
+ "epoch": 13.23841059602649,
730
+ "grad_norm": 10.095162391662598,
731
+ "learning_rate": 4.824324324324325e-05,
732
+ "loss": 1.013,
733
+ "step": 490
734
+ },
735
+ {
736
+ "epoch": 13.42384105960265,
737
+ "grad_norm": 9.601603507995605,
738
+ "learning_rate": 4.813813813813814e-05,
739
+ "loss": 0.9864,
740
+ "step": 497
741
+ },
742
+ {
743
+ "epoch": 13.609271523178808,
744
+ "grad_norm": 8.540664672851562,
745
+ "learning_rate": 4.8033033033033034e-05,
746
+ "loss": 0.8722,
747
+ "step": 504
748
+ },
749
+ {
750
+ "epoch": 13.794701986754967,
751
+ "grad_norm": 10.721132278442383,
752
+ "learning_rate": 4.792792792792793e-05,
753
+ "loss": 1.0975,
754
+ "step": 511
755
+ },
756
+ {
757
+ "epoch": 13.980132450331126,
758
+ "grad_norm": 11.830833435058594,
759
+ "learning_rate": 4.782282282282283e-05,
760
+ "loss": 1.0312,
761
+ "step": 518
762
+ },
763
+ {
764
+ "epoch": 13.980132450331126,
765
+ "eval_accuracy": 0.7975409836065573,
766
+ "eval_f1_macro": 0.7928224644292756,
767
+ "eval_f1_micro": 0.7975409836065573,
768
+ "eval_f1_weighted": 0.7953112733063121,
769
+ "eval_loss": 0.6965898275375366,
770
+ "eval_precision_macro": 0.8293739316239317,
771
+ "eval_precision_micro": 0.7975409836065573,
772
+ "eval_precision_weighted": 0.8288230894788272,
773
+ "eval_recall_macro": 0.7924821428571428,
774
+ "eval_recall_micro": 0.7975409836065573,
775
+ "eval_recall_weighted": 0.7975409836065573,
776
+ "eval_runtime": 40.1775,
777
+ "eval_samples_per_second": 30.365,
778
+ "eval_steps_per_second": 0.498,
779
+ "step": 518
780
+ },
781
+ {
782
+ "epoch": 14.185430463576159,
783
+ "grad_norm": 10.47096061706543,
784
+ "learning_rate": 4.771771771771772e-05,
785
+ "loss": 1.0098,
786
+ "step": 525
787
+ },
788
+ {
789
+ "epoch": 14.370860927152318,
790
+ "grad_norm": 9.135430335998535,
791
+ "learning_rate": 4.761261261261262e-05,
792
+ "loss": 1.0335,
793
+ "step": 532
794
+ },
795
+ {
796
+ "epoch": 14.556291390728477,
797
+ "grad_norm": 9.590068817138672,
798
+ "learning_rate": 4.7507507507507506e-05,
799
+ "loss": 0.8974,
800
+ "step": 539
801
+ },
802
+ {
803
+ "epoch": 14.741721854304636,
804
+ "grad_norm": 11.321711540222168,
805
+ "learning_rate": 4.7402402402402406e-05,
806
+ "loss": 0.9296,
807
+ "step": 546
808
+ },
809
+ {
810
+ "epoch": 14.927152317880795,
811
+ "grad_norm": 11.742105484008789,
812
+ "learning_rate": 4.72972972972973e-05,
813
+ "loss": 0.9903,
814
+ "step": 553
815
+ },
816
+ {
817
+ "epoch": 14.980132450331126,
818
+ "eval_accuracy": 0.8016393442622951,
819
+ "eval_f1_macro": 0.7979719305748716,
820
+ "eval_f1_micro": 0.8016393442622951,
821
+ "eval_f1_weighted": 0.8006367112297681,
822
+ "eval_loss": 0.6716923117637634,
823
+ "eval_precision_macro": 0.8302031995781997,
824
+ "eval_precision_micro": 0.8016393442622951,
825
+ "eval_precision_weighted": 0.8311227843195056,
826
+ "eval_recall_macro": 0.797970238095238,
827
+ "eval_recall_micro": 0.8016393442622951,
828
+ "eval_recall_weighted": 0.8016393442622951,
829
+ "eval_runtime": 63.6088,
830
+ "eval_samples_per_second": 19.18,
831
+ "eval_steps_per_second": 0.314,
832
+ "step": 555
833
+ },
834
+ {
835
+ "epoch": 15.132450331125828,
836
+ "grad_norm": 9.648506164550781,
837
+ "learning_rate": 4.71921921921922e-05,
838
+ "loss": 0.9712,
839
+ "step": 560
840
+ },
841
+ {
842
+ "epoch": 15.317880794701987,
843
+ "grad_norm": 8.26517391204834,
844
+ "learning_rate": 4.708708708708709e-05,
845
+ "loss": 0.8787,
846
+ "step": 567
847
+ },
848
+ {
849
+ "epoch": 15.503311258278146,
850
+ "grad_norm": 8.81796646118164,
851
+ "learning_rate": 4.698198198198198e-05,
852
+ "loss": 0.9198,
853
+ "step": 574
854
+ },
855
+ {
856
+ "epoch": 15.688741721854305,
857
+ "grad_norm": 8.624659538269043,
858
+ "learning_rate": 4.687687687687688e-05,
859
+ "loss": 0.8703,
860
+ "step": 581
861
+ },
862
+ {
863
+ "epoch": 15.874172185430464,
864
+ "grad_norm": 8.055384635925293,
865
+ "learning_rate": 4.677177177177177e-05,
866
+ "loss": 0.9453,
867
+ "step": 588
868
+ },
869
+ {
870
+ "epoch": 15.980132450331126,
871
+ "eval_accuracy": 0.8213114754098361,
872
+ "eval_f1_macro": 0.8200054048548631,
873
+ "eval_f1_micro": 0.8213114754098361,
874
+ "eval_f1_weighted": 0.8235741781663722,
875
+ "eval_loss": 0.65358966588974,
876
+ "eval_precision_macro": 0.8519878316128316,
877
+ "eval_precision_micro": 0.8213114754098361,
878
+ "eval_precision_weighted": 0.8550255686936015,
879
+ "eval_recall_macro": 0.8179345238095237,
880
+ "eval_recall_micro": 0.8213114754098361,
881
+ "eval_recall_weighted": 0.8213114754098361,
882
+ "eval_runtime": 61.357,
883
+ "eval_samples_per_second": 19.884,
884
+ "eval_steps_per_second": 0.326,
885
+ "step": 592
886
+ },
887
+ {
888
+ "epoch": 16.079470198675498,
889
+ "grad_norm": 7.529909133911133,
890
+ "learning_rate": 4.666666666666667e-05,
891
+ "loss": 0.981,
892
+ "step": 595
893
+ },
894
+ {
895
+ "epoch": 16.264900662251655,
896
+ "grad_norm": 7.517402172088623,
897
+ "learning_rate": 4.6561561561561565e-05,
898
+ "loss": 0.7995,
899
+ "step": 602
900
+ },
901
+ {
902
+ "epoch": 16.450331125827816,
903
+ "grad_norm": 8.271308898925781,
904
+ "learning_rate": 4.645645645645646e-05,
905
+ "loss": 0.7973,
906
+ "step": 609
907
+ },
908
+ {
909
+ "epoch": 16.635761589403973,
910
+ "grad_norm": 8.204742431640625,
911
+ "learning_rate": 4.635135135135135e-05,
912
+ "loss": 0.841,
913
+ "step": 616
914
+ },
915
+ {
916
+ "epoch": 16.821192052980134,
917
+ "grad_norm": 8.602618217468262,
918
+ "learning_rate": 4.624624624624625e-05,
919
+ "loss": 0.939,
920
+ "step": 623
921
+ },
922
+ {
923
+ "epoch": 16.980132450331126,
924
+ "eval_accuracy": 0.8221311475409836,
925
+ "eval_f1_macro": 0.8163740850652615,
926
+ "eval_f1_micro": 0.8221311475409836,
927
+ "eval_f1_weighted": 0.8187325264470876,
928
+ "eval_loss": 0.6268297433853149,
929
+ "eval_precision_macro": 0.844480339105339,
930
+ "eval_precision_micro": 0.8221311475409836,
931
+ "eval_precision_weighted": 0.8438404832872046,
932
+ "eval_recall_macro": 0.8187619047619047,
933
+ "eval_recall_micro": 0.8221311475409836,
934
+ "eval_recall_weighted": 0.8221311475409836,
935
+ "eval_runtime": 61.1021,
936
+ "eval_samples_per_second": 19.967,
937
+ "eval_steps_per_second": 0.327,
938
+ "step": 629
939
+ },
940
+ {
941
+ "epoch": 17.026490066225165,
942
+ "grad_norm": 8.355627059936523,
943
+ "learning_rate": 4.6141141141141144e-05,
944
+ "loss": 0.7965,
945
+ "step": 630
946
+ },
947
+ {
948
+ "epoch": 17.211920529801326,
949
+ "grad_norm": 8.196239471435547,
950
+ "learning_rate": 4.603603603603604e-05,
951
+ "loss": 0.7299,
952
+ "step": 637
953
+ },
954
+ {
955
+ "epoch": 17.397350993377483,
956
+ "grad_norm": 5.987618923187256,
957
+ "learning_rate": 4.593093093093093e-05,
958
+ "loss": 0.7109,
959
+ "step": 644
960
+ },
961
+ {
962
+ "epoch": 17.582781456953644,
963
+ "grad_norm": 7.704870223999023,
964
+ "learning_rate": 4.582582582582583e-05,
965
+ "loss": 0.7835,
966
+ "step": 651
967
+ },
968
+ {
969
+ "epoch": 17.7682119205298,
970
+ "grad_norm": 9.14775276184082,
971
+ "learning_rate": 4.572072072072072e-05,
972
+ "loss": 0.7746,
973
+ "step": 658
974
+ },
975
+ {
976
+ "epoch": 17.95364238410596,
977
+ "grad_norm": 9.578638076782227,
978
+ "learning_rate": 4.561561561561562e-05,
979
+ "loss": 0.7787,
980
+ "step": 665
981
+ },
982
+ {
983
+ "epoch": 17.980132450331126,
984
+ "eval_accuracy": 0.8254098360655737,
985
+ "eval_f1_macro": 0.8200774953460711,
986
+ "eval_f1_micro": 0.8254098360655737,
987
+ "eval_f1_weighted": 0.8212272647936633,
988
+ "eval_loss": 0.6105802655220032,
989
+ "eval_precision_macro": 0.8466414141414141,
990
+ "eval_precision_micro": 0.8254098360655737,
991
+ "eval_precision_weighted": 0.846929452605682,
992
+ "eval_recall_macro": 0.824232142857143,
993
+ "eval_recall_micro": 0.8254098360655737,
994
+ "eval_recall_weighted": 0.8254098360655737,
995
+ "eval_runtime": 61.552,
996
+ "eval_samples_per_second": 19.821,
997
+ "eval_steps_per_second": 0.325,
998
+ "step": 666
999
+ },
1000
+ {
1001
+ "epoch": 18.158940397350992,
1002
+ "grad_norm": 10.692727088928223,
1003
+ "learning_rate": 4.551051051051051e-05,
1004
+ "loss": 0.7673,
1005
+ "step": 672
1006
+ },
1007
+ {
1008
+ "epoch": 18.344370860927153,
1009
+ "grad_norm": 8.184776306152344,
1010
+ "learning_rate": 4.540540540540541e-05,
1011
+ "loss": 0.6994,
1012
+ "step": 679
1013
+ },
1014
+ {
1015
+ "epoch": 18.52980132450331,
1016
+ "grad_norm": 6.153806686401367,
1017
+ "learning_rate": 4.53003003003003e-05,
1018
+ "loss": 0.7311,
1019
+ "step": 686
1020
+ },
1021
+ {
1022
+ "epoch": 18.71523178807947,
1023
+ "grad_norm": 8.01244831085205,
1024
+ "learning_rate": 4.5195195195195196e-05,
1025
+ "loss": 0.69,
1026
+ "step": 693
1027
+ },
1028
+ {
1029
+ "epoch": 18.90066225165563,
1030
+ "grad_norm": 9.361739158630371,
1031
+ "learning_rate": 4.5090090090090095e-05,
1032
+ "loss": 0.7366,
1033
+ "step": 700
1034
+ },
1035
+ {
1036
+ "epoch": 18.980132450331126,
1037
+ "eval_accuracy": 0.830327868852459,
1038
+ "eval_f1_macro": 0.8230471515069039,
1039
+ "eval_f1_micro": 0.830327868852459,
1040
+ "eval_f1_weighted": 0.8255601272328421,
1041
+ "eval_loss": 0.5895594358444214,
1042
+ "eval_precision_macro": 0.8522619047619048,
1043
+ "eval_precision_micro": 0.830327868852459,
1044
+ "eval_precision_weighted": 0.8527185792349726,
1045
+ "eval_recall_macro": 0.8273750000000001,
1046
+ "eval_recall_micro": 0.830327868852459,
1047
+ "eval_recall_weighted": 0.830327868852459,
1048
+ "eval_runtime": 47.5144,
1049
+ "eval_samples_per_second": 25.676,
1050
+ "eval_steps_per_second": 0.421,
1051
+ "step": 703
1052
+ },
1053
+ {
1054
+ "epoch": 19.105960264900663,
1055
+ "grad_norm": 9.264578819274902,
1056
+ "learning_rate": 4.498498498498498e-05,
1057
+ "loss": 0.6839,
1058
+ "step": 707
1059
+ },
1060
+ {
1061
+ "epoch": 19.29139072847682,
1062
+ "grad_norm": 11.78283405303955,
1063
+ "learning_rate": 4.487987987987988e-05,
1064
+ "loss": 0.7013,
1065
+ "step": 714
1066
+ },
1067
+ {
1068
+ "epoch": 19.47682119205298,
1069
+ "grad_norm": 7.552002906799316,
1070
+ "learning_rate": 4.4774774774774775e-05,
1071
+ "loss": 0.7158,
1072
+ "step": 721
1073
+ },
1074
+ {
1075
+ "epoch": 19.662251655629138,
1076
+ "grad_norm": 8.90753173828125,
1077
+ "learning_rate": 4.4669669669669675e-05,
1078
+ "loss": 0.7336,
1079
+ "step": 728
1080
+ },
1081
+ {
1082
+ "epoch": 19.8476821192053,
1083
+ "grad_norm": 9.122941970825195,
1084
+ "learning_rate": 4.456456456456457e-05,
1085
+ "loss": 0.7401,
1086
+ "step": 735
1087
+ },
1088
+ {
1089
+ "epoch": 19.980132450331126,
1090
+ "eval_accuracy": 0.8245901639344262,
1091
+ "eval_f1_macro": 0.8191573312652259,
1092
+ "eval_f1_micro": 0.8245901639344262,
1093
+ "eval_f1_weighted": 0.8222225393243513,
1094
+ "eval_loss": 0.6023704409599304,
1095
+ "eval_precision_macro": 0.8524332611832612,
1096
+ "eval_precision_micro": 0.8245901639344262,
1097
+ "eval_precision_weighted": 0.8533740212428738,
1098
+ "eval_recall_macro": 0.8203273809523809,
1099
+ "eval_recall_micro": 0.8245901639344262,
1100
+ "eval_recall_weighted": 0.8245901639344262,
1101
+ "eval_runtime": 43.4261,
1102
+ "eval_samples_per_second": 28.094,
1103
+ "eval_steps_per_second": 0.461,
1104
+ "step": 740
1105
+ },
1106
+ {
1107
+ "epoch": 20.05298013245033,
1108
+ "grad_norm": 7.7251386642456055,
1109
+ "learning_rate": 4.445945945945946e-05,
1110
+ "loss": 0.8092,
1111
+ "step": 742
1112
+ },
1113
+ {
1114
+ "epoch": 20.23841059602649,
1115
+ "grad_norm": 7.6273112297058105,
1116
+ "learning_rate": 4.4354354354354354e-05,
1117
+ "loss": 0.7029,
1118
+ "step": 749
1119
+ },
1120
+ {
1121
+ "epoch": 20.423841059602648,
1122
+ "grad_norm": 8.434123039245605,
1123
+ "learning_rate": 4.4249249249249254e-05,
1124
+ "loss": 0.6458,
1125
+ "step": 756
1126
+ },
1127
+ {
1128
+ "epoch": 20.60927152317881,
1129
+ "grad_norm": 8.381632804870605,
1130
+ "learning_rate": 4.414414414414415e-05,
1131
+ "loss": 0.6677,
1132
+ "step": 763
1133
+ },
1134
+ {
1135
+ "epoch": 20.794701986754966,
1136
+ "grad_norm": 7.361513614654541,
1137
+ "learning_rate": 4.403903903903904e-05,
1138
+ "loss": 0.6974,
1139
+ "step": 770
1140
+ },
1141
+ {
1142
+ "epoch": 20.980132450331126,
1143
+ "grad_norm": 6.694199085235596,
1144
+ "learning_rate": 4.393393393393393e-05,
1145
+ "loss": 0.6855,
1146
+ "step": 777
1147
+ },
1148
+ {
1149
+ "epoch": 20.980132450331126,
1150
+ "eval_accuracy": 0.8352459016393443,
1151
+ "eval_f1_macro": 0.8301821885974363,
1152
+ "eval_f1_micro": 0.8352459016393443,
1153
+ "eval_f1_weighted": 0.8337427464021765,
1154
+ "eval_loss": 0.5506462454795837,
1155
+ "eval_precision_macro": 0.8520505050505051,
1156
+ "eval_precision_micro": 0.8352459016393443,
1157
+ "eval_precision_weighted": 0.8550262283254086,
1158
+ "eval_recall_macro": 0.8311190476190475,
1159
+ "eval_recall_micro": 0.8352459016393443,
1160
+ "eval_recall_weighted": 0.8352459016393443,
1161
+ "eval_runtime": 42.4103,
1162
+ "eval_samples_per_second": 28.767,
1163
+ "eval_steps_per_second": 0.472,
1164
+ "step": 777
1165
+ },
1166
+ {
1167
+ "epoch": 21.185430463576157,
1168
+ "grad_norm": 7.033413887023926,
1169
+ "learning_rate": 4.382882882882883e-05,
1170
+ "loss": 0.6146,
1171
+ "step": 784
1172
+ },
1173
+ {
1174
+ "epoch": 21.370860927152318,
1175
+ "grad_norm": 6.893985271453857,
1176
+ "learning_rate": 4.3723723723723726e-05,
1177
+ "loss": 0.6297,
1178
+ "step": 791
1179
+ },
1180
+ {
1181
+ "epoch": 21.556291390728475,
1182
+ "grad_norm": 8.514236450195312,
1183
+ "learning_rate": 4.3618618618618626e-05,
1184
+ "loss": 0.5797,
1185
+ "step": 798
1186
+ },
1187
+ {
1188
+ "epoch": 21.741721854304636,
1189
+ "grad_norm": 6.57477331161499,
1190
+ "learning_rate": 4.351351351351351e-05,
1191
+ "loss": 0.7013,
1192
+ "step": 805
1193
+ },
1194
+ {
1195
+ "epoch": 21.927152317880793,
1196
+ "grad_norm": 8.258438110351562,
1197
+ "learning_rate": 4.340840840840841e-05,
1198
+ "loss": 0.6179,
1199
+ "step": 812
1200
+ },
1201
+ {
1202
+ "epoch": 21.980132450331126,
1203
+ "eval_accuracy": 0.8434426229508196,
1204
+ "eval_f1_macro": 0.8351991206677739,
1205
+ "eval_f1_micro": 0.8434426229508196,
1206
+ "eval_f1_weighted": 0.839283469657334,
1207
+ "eval_loss": 0.5583724975585938,
1208
+ "eval_precision_macro": 0.8586598401598402,
1209
+ "eval_precision_micro": 0.8434426229508196,
1210
+ "eval_precision_weighted": 0.8618578962021584,
1211
+ "eval_recall_macro": 0.8398928571428571,
1212
+ "eval_recall_micro": 0.8434426229508196,
1213
+ "eval_recall_weighted": 0.8434426229508196,
1214
+ "eval_runtime": 42.4207,
1215
+ "eval_samples_per_second": 28.76,
1216
+ "eval_steps_per_second": 0.471,
1217
+ "step": 814
1218
+ },
1219
+ {
1220
+ "epoch": 22.132450331125828,
1221
+ "grad_norm": 6.932776927947998,
1222
+ "learning_rate": 4.3303303303303305e-05,
1223
+ "loss": 0.6852,
1224
+ "step": 819
1225
+ },
1226
+ {
1227
+ "epoch": 22.31788079470199,
1228
+ "grad_norm": 7.310856342315674,
1229
+ "learning_rate": 4.31981981981982e-05,
1230
+ "loss": 0.6273,
1231
+ "step": 826
1232
+ },
1233
+ {
1234
+ "epoch": 22.503311258278146,
1235
+ "grad_norm": 7.708891868591309,
1236
+ "learning_rate": 4.30930930930931e-05,
1237
+ "loss": 0.6736,
1238
+ "step": 833
1239
+ },
1240
+ {
1241
+ "epoch": 22.688741721854306,
1242
+ "grad_norm": 7.841245651245117,
1243
+ "learning_rate": 4.2987987987987985e-05,
1244
+ "loss": 0.6196,
1245
+ "step": 840
1246
+ },
1247
+ {
1248
+ "epoch": 22.874172185430464,
1249
+ "grad_norm": 8.23265266418457,
1250
+ "learning_rate": 4.2882882882882885e-05,
1251
+ "loss": 0.7047,
1252
+ "step": 847
1253
+ },
1254
+ {
1255
+ "epoch": 22.980132450331126,
1256
+ "eval_accuracy": 0.8377049180327869,
1257
+ "eval_f1_macro": 0.8308334231128349,
1258
+ "eval_f1_micro": 0.8377049180327869,
1259
+ "eval_f1_weighted": 0.8344787582769744,
1260
+ "eval_loss": 0.5581976175308228,
1261
+ "eval_precision_macro": 0.8549415584415584,
1262
+ "eval_precision_micro": 0.8377049180327869,
1263
+ "eval_precision_weighted": 0.8558833652686111,
1264
+ "eval_recall_macro": 0.832327380952381,
1265
+ "eval_recall_micro": 0.8377049180327869,
1266
+ "eval_recall_weighted": 0.8377049180327869,
1267
+ "eval_runtime": 40.4504,
1268
+ "eval_samples_per_second": 30.16,
1269
+ "eval_steps_per_second": 0.494,
1270
+ "step": 851
1271
+ },
1272
+ {
1273
+ "epoch": 23.079470198675498,
1274
+ "grad_norm": 6.447187900543213,
1275
+ "learning_rate": 4.277777777777778e-05,
1276
+ "loss": 0.6207,
1277
+ "step": 854
1278
+ },
1279
+ {
1280
+ "epoch": 23.264900662251655,
1281
+ "grad_norm": 7.463189601898193,
1282
+ "learning_rate": 4.267267267267268e-05,
1283
+ "loss": 0.5505,
1284
+ "step": 861
1285
+ },
1286
+ {
1287
+ "epoch": 23.450331125827816,
1288
+ "grad_norm": 6.670192718505859,
1289
+ "learning_rate": 4.256756756756757e-05,
1290
+ "loss": 0.5429,
1291
+ "step": 868
1292
+ },
1293
+ {
1294
+ "epoch": 23.635761589403973,
1295
+ "grad_norm": 7.076443672180176,
1296
+ "learning_rate": 4.2462462462462464e-05,
1297
+ "loss": 0.562,
1298
+ "step": 875
1299
+ },
1300
+ {
1301
+ "epoch": 23.821192052980134,
1302
+ "grad_norm": 7.10822057723999,
1303
+ "learning_rate": 4.235735735735736e-05,
1304
+ "loss": 0.6327,
1305
+ "step": 882
1306
+ },
1307
+ {
1308
+ "epoch": 23.980132450331126,
1309
+ "eval_accuracy": 0.8475409836065574,
1310
+ "eval_f1_macro": 0.8420048204719258,
1311
+ "eval_f1_micro": 0.8475409836065574,
1312
+ "eval_f1_weighted": 0.8443291816274991,
1313
+ "eval_loss": 0.536198079586029,
1314
+ "eval_precision_macro": 0.86352886002886,
1315
+ "eval_precision_micro": 0.8475409836065574,
1316
+ "eval_precision_weighted": 0.8657139013081634,
1317
+ "eval_recall_macro": 0.8454404761904761,
1318
+ "eval_recall_micro": 0.8475409836065574,
1319
+ "eval_recall_weighted": 0.8475409836065574,
1320
+ "eval_runtime": 41.4502,
1321
+ "eval_samples_per_second": 29.433,
1322
+ "eval_steps_per_second": 0.483,
1323
+ "step": 888
1324
+ },
1325
+ {
1326
+ "epoch": 24.026490066225165,
1327
+ "grad_norm": 7.926750183105469,
1328
+ "learning_rate": 4.225225225225226e-05,
1329
+ "loss": 0.6482,
1330
+ "step": 889
1331
+ },
1332
+ {
1333
+ "epoch": 24.211920529801326,
1334
+ "grad_norm": 6.154850006103516,
1335
+ "learning_rate": 4.214714714714715e-05,
1336
+ "loss": 0.5294,
1337
+ "step": 896
1338
+ },
1339
+ {
1340
+ "epoch": 24.397350993377483,
1341
+ "grad_norm": 7.406444549560547,
1342
+ "learning_rate": 4.204204204204204e-05,
1343
+ "loss": 0.5871,
1344
+ "step": 903
1345
+ },
1346
+ {
1347
+ "epoch": 24.582781456953644,
1348
+ "grad_norm": 7.258051872253418,
1349
+ "learning_rate": 4.1936936936936936e-05,
1350
+ "loss": 0.582,
1351
+ "step": 910
1352
+ },
1353
+ {
1354
+ "epoch": 24.7682119205298,
1355
+ "grad_norm": 7.2824859619140625,
1356
+ "learning_rate": 4.1831831831831836e-05,
1357
+ "loss": 0.569,
1358
+ "step": 917
1359
+ },
1360
+ {
1361
+ "epoch": 24.95364238410596,
1362
+ "grad_norm": 7.095022678375244,
1363
+ "learning_rate": 4.172672672672673e-05,
1364
+ "loss": 0.6194,
1365
+ "step": 924
1366
+ },
1367
+ {
1368
+ "epoch": 24.980132450331126,
1369
+ "eval_accuracy": 0.8418032786885246,
1370
+ "eval_f1_macro": 0.8358616677440207,
1371
+ "eval_f1_micro": 0.8418032786885246,
1372
+ "eval_f1_weighted": 0.8394348088694761,
1373
+ "eval_loss": 0.53657066822052,
1374
+ "eval_precision_macro": 0.8616504606504606,
1375
+ "eval_precision_micro": 0.8418032786885246,
1376
+ "eval_precision_weighted": 0.8629244162031047,
1377
+ "eval_recall_macro": 0.8374583333333332,
1378
+ "eval_recall_micro": 0.8418032786885246,
1379
+ "eval_recall_weighted": 0.8418032786885246,
1380
+ "eval_runtime": 40.6599,
1381
+ "eval_samples_per_second": 30.005,
1382
+ "eval_steps_per_second": 0.492,
1383
+ "step": 925
1384
+ },
1385
+ {
1386
+ "epoch": 25.158940397350992,
1387
+ "grad_norm": 8.208196640014648,
1388
+ "learning_rate": 4.162162162162163e-05,
1389
+ "loss": 0.5924,
1390
+ "step": 931
1391
+ },
1392
+ {
1393
+ "epoch": 25.344370860927153,
1394
+ "grad_norm": 5.883511066436768,
1395
+ "learning_rate": 4.1516516516516515e-05,
1396
+ "loss": 0.5908,
1397
+ "step": 938
1398
+ },
1399
+ {
1400
+ "epoch": 25.52980132450331,
1401
+ "grad_norm": 7.456212997436523,
1402
+ "learning_rate": 4.1411411411411415e-05,
1403
+ "loss": 0.5531,
1404
+ "step": 945
1405
+ },
1406
+ {
1407
+ "epoch": 25.71523178807947,
1408
+ "grad_norm": 6.946765422821045,
1409
+ "learning_rate": 4.130630630630631e-05,
1410
+ "loss": 0.6191,
1411
+ "step": 952
1412
+ },
1413
+ {
1414
+ "epoch": 25.90066225165563,
1415
+ "grad_norm": 5.55208158493042,
1416
+ "learning_rate": 4.12012012012012e-05,
1417
+ "loss": 0.5062,
1418
+ "step": 959
1419
+ },
1420
+ {
1421
+ "epoch": 25.980132450331126,
1422
+ "eval_accuracy": 0.839344262295082,
1423
+ "eval_f1_macro": 0.8345011108172872,
1424
+ "eval_f1_micro": 0.839344262295082,
1425
+ "eval_f1_weighted": 0.8372446574640403,
1426
+ "eval_loss": 0.5442374348640442,
1427
+ "eval_precision_macro": 0.8636957070707072,
1428
+ "eval_precision_micro": 0.839344262295082,
1429
+ "eval_precision_weighted": 0.8666759219832991,
1430
+ "eval_recall_macro": 0.8376964285714286,
1431
+ "eval_recall_micro": 0.839344262295082,
1432
+ "eval_recall_weighted": 0.839344262295082,
1433
+ "eval_runtime": 40.5992,
1434
+ "eval_samples_per_second": 30.05,
1435
+ "eval_steps_per_second": 0.493,
1436
+ "step": 962
1437
+ },
1438
+ {
1439
+ "epoch": 26.105960264900663,
1440
+ "grad_norm": 6.749058723449707,
1441
+ "learning_rate": 4.10960960960961e-05,
1442
+ "loss": 0.5717,
1443
+ "step": 966
1444
+ },
1445
+ {
1446
+ "epoch": 26.29139072847682,
1447
+ "grad_norm": 7.851437568664551,
1448
+ "learning_rate": 4.099099099099099e-05,
1449
+ "loss": 0.594,
1450
+ "step": 973
1451
+ },
1452
+ {
1453
+ "epoch": 26.47682119205298,
1454
+ "grad_norm": 5.15859842300415,
1455
+ "learning_rate": 4.088588588588589e-05,
1456
+ "loss": 0.5236,
1457
+ "step": 980
1458
+ },
1459
+ {
1460
+ "epoch": 26.662251655629138,
1461
+ "grad_norm": 6.7099103927612305,
1462
+ "learning_rate": 4.078078078078078e-05,
1463
+ "loss": 0.5225,
1464
+ "step": 987
1465
+ },
1466
+ {
1467
+ "epoch": 26.8476821192053,
1468
+ "grad_norm": 8.952303886413574,
1469
+ "learning_rate": 4.067567567567568e-05,
1470
+ "loss": 0.536,
1471
+ "step": 994
1472
+ },
1473
+ {
1474
+ "epoch": 26.980132450331126,
1475
+ "eval_accuracy": 0.8540983606557377,
1476
+ "eval_f1_macro": 0.8494095251154075,
1477
+ "eval_f1_micro": 0.8540983606557377,
1478
+ "eval_f1_weighted": 0.8529669682080483,
1479
+ "eval_loss": 0.5279183983802795,
1480
+ "eval_precision_macro": 0.8678869047619048,
1481
+ "eval_precision_micro": 0.8540983606557377,
1482
+ "eval_precision_weighted": 0.8713430262815508,
1483
+ "eval_recall_macro": 0.851375,
1484
+ "eval_recall_micro": 0.8540983606557377,
1485
+ "eval_recall_weighted": 0.8540983606557377,
1486
+ "eval_runtime": 50.6859,
1487
+ "eval_samples_per_second": 24.07,
1488
+ "eval_steps_per_second": 0.395,
1489
+ "step": 999
1490
+ },
1491
+ {
1492
+ "epoch": 27.05298013245033,
1493
+ "grad_norm": 6.7562713623046875,
1494
+ "learning_rate": 4.0570570570570574e-05,
1495
+ "loss": 0.556,
1496
+ "step": 1001
1497
+ },
1498
+ {
1499
+ "epoch": 27.23841059602649,
1500
+ "grad_norm": 8.072733879089355,
1501
+ "learning_rate": 4.046546546546547e-05,
1502
+ "loss": 0.5502,
1503
+ "step": 1008
1504
+ },
1505
+ {
1506
+ "epoch": 27.423841059602648,
1507
+ "grad_norm": 7.506572246551514,
1508
+ "learning_rate": 4.036036036036036e-05,
1509
+ "loss": 0.5333,
1510
+ "step": 1015
1511
+ },
1512
+ {
1513
+ "epoch": 27.60927152317881,
1514
+ "grad_norm": 6.526910305023193,
1515
+ "learning_rate": 4.025525525525526e-05,
1516
+ "loss": 0.5411,
1517
+ "step": 1022
1518
+ },
1519
+ {
1520
+ "epoch": 27.794701986754966,
1521
+ "grad_norm": 7.3132219314575195,
1522
+ "learning_rate": 4.015015015015015e-05,
1523
+ "loss": 0.4913,
1524
+ "step": 1029
1525
+ },
1526
+ {
1527
+ "epoch": 27.980132450331126,
1528
+ "grad_norm": 6.7607293128967285,
1529
+ "learning_rate": 4.0045045045045046e-05,
1530
+ "loss": 0.4581,
1531
+ "step": 1036
1532
+ },
1533
+ {
1534
+ "epoch": 27.980132450331126,
1535
+ "eval_accuracy": 0.8450819672131148,
1536
+ "eval_f1_macro": 0.8403489754999043,
1537
+ "eval_f1_micro": 0.8450819672131148,
1538
+ "eval_f1_weighted": 0.8432557118100563,
1539
+ "eval_loss": 0.5285335183143616,
1540
+ "eval_precision_macro": 0.8622770562770563,
1541
+ "eval_precision_micro": 0.8450819672131148,
1542
+ "eval_precision_weighted": 0.8648690653608687,
1543
+ "eval_recall_macro": 0.8421666666666667,
1544
+ "eval_recall_micro": 0.8450819672131148,
1545
+ "eval_recall_weighted": 0.8450819672131148,
1546
+ "eval_runtime": 47.0435,
1547
+ "eval_samples_per_second": 25.933,
1548
+ "eval_steps_per_second": 0.425,
1549
+ "step": 1036
1550
+ },
1551
+ {
1552
+ "epoch": 28.185430463576157,
1553
+ "grad_norm": 5.862381935119629,
1554
+ "learning_rate": 3.993993993993994e-05,
1555
+ "loss": 0.463,
1556
+ "step": 1043
1557
+ },
1558
+ {
1559
+ "epoch": 28.370860927152318,
1560
+ "grad_norm": 7.303929328918457,
1561
+ "learning_rate": 3.983483483483484e-05,
1562
+ "loss": 0.4783,
1563
+ "step": 1050
1564
+ },
1565
+ {
1566
+ "epoch": 28.556291390728475,
1567
+ "grad_norm": 6.46702766418457,
1568
+ "learning_rate": 3.972972972972973e-05,
1569
+ "loss": 0.4826,
1570
+ "step": 1057
1571
+ },
1572
+ {
1573
+ "epoch": 28.741721854304636,
1574
+ "grad_norm": 7.615893363952637,
1575
+ "learning_rate": 3.962462462462463e-05,
1576
+ "loss": 0.5393,
1577
+ "step": 1064
1578
+ },
1579
+ {
1580
+ "epoch": 28.927152317880793,
1581
+ "grad_norm": 6.58096170425415,
1582
+ "learning_rate": 3.951951951951952e-05,
1583
+ "loss": 0.5844,
1584
+ "step": 1071
1585
+ },
1586
+ {
1587
+ "epoch": 28.980132450331126,
1588
+ "eval_accuracy": 0.8377049180327869,
1589
+ "eval_f1_macro": 0.8312956210456212,
1590
+ "eval_f1_micro": 0.8377049180327869,
1591
+ "eval_f1_weighted": 0.8337990174670502,
1592
+ "eval_loss": 0.5218645334243774,
1593
+ "eval_precision_macro": 0.8636145382395383,
1594
+ "eval_precision_micro": 0.8377049180327869,
1595
+ "eval_precision_weighted": 0.8647297329264542,
1596
+ "eval_recall_macro": 0.8355833333333333,
1597
+ "eval_recall_micro": 0.8377049180327869,
1598
+ "eval_recall_weighted": 0.8377049180327869,
1599
+ "eval_runtime": 53.5985,
1600
+ "eval_samples_per_second": 22.762,
1601
+ "eval_steps_per_second": 0.373,
1602
+ "step": 1073
1603
+ }
1604
+ ],
1605
+ "logging_steps": 7,
1606
+ "max_steps": 3700,
1607
+ "num_input_tokens_seen": 0,
1608
+ "num_train_epochs": 100,
1609
+ "save_steps": 7,
1610
+ "stateful_callbacks": {
1611
+ "EarlyStoppingCallback": {
1612
+ "args": {
1613
+ "early_stopping_patience": 5,
1614
+ "early_stopping_threshold": 0.01
1615
+ },
1616
+ "attributes": {
1617
+ "early_stopping_patience_counter": 5
1618
+ }
1619
+ },
1620
+ "TrainerControl": {
1621
+ "args": {
1622
+ "should_epoch_stop": false,
1623
+ "should_evaluate": false,
1624
+ "should_log": false,
1625
+ "should_save": true,
1626
+ "should_training_stop": true
1627
+ },
1628
+ "attributes": {}
1629
+ }
1630
+ },
1631
+ "total_flos": 4.523422474750132e+18,
1632
+ "train_batch_size": 32,
1633
+ "trial_name": null,
1634
+ "trial_params": null
1635
+ }
checkpoint-1073/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da0df3ee160ed78a94d79268add1dd382f9eb019c94de90bf1adfdeaace04bb
3
+ size 5304