darklorddad commited on
Commit
d64ef1b
·
verified ·
1 Parent(s): 3975de8

Upload 5 files

Browse files
checkpoint-1275/config.json ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "XinWenMonash/swin_transformer",
3
+ "_num_labels": 200,
4
+ "architectures": [
5
+ "SwinForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 18,
12
+ 2
13
+ ],
14
+ "drop_path_rate": 0.1,
15
+ "dtype": "float32",
16
+ "embed_dim": 128,
17
+ "encoder_stride": 32,
18
+ "hidden_act": "gelu",
19
+ "hidden_dropout_prob": 0.0,
20
+ "hidden_size": 1024,
21
+ "id2label": {
22
+ "0": "acadian_flycatcher",
23
+ "1": "american_crow",
24
+ "2": "american_goldfinch",
25
+ "3": "american_pipit",
26
+ "4": "american_redstart",
27
+ "5": "american_three_toed_woodpecker",
28
+ "6": "anna_hummingbird",
29
+ "7": "artic_tern",
30
+ "8": "baird_sparrow",
31
+ "9": "baltimore_oriole",
32
+ "10": "bank_swallow",
33
+ "11": "barn_swallow",
34
+ "12": "bay_breasted_warbler",
35
+ "13": "belted_kingfisher",
36
+ "14": "bewick_wren",
37
+ "15": "black_and_white_warbler",
38
+ "16": "black_billed_cuckoo",
39
+ "17": "black_capped_vireo",
40
+ "18": "black_footed_albatross",
41
+ "19": "black_tern",
42
+ "20": "black_throated_blue_warbler",
43
+ "21": "black_throated_sparrow",
44
+ "22": "blue_grosbeak",
45
+ "23": "blue_headed_vireo",
46
+ "24": "blue_jay",
47
+ "25": "blue_winged_warbler",
48
+ "26": "boat_tailed_grackle",
49
+ "27": "bobolink",
50
+ "28": "bohemian_waxwing",
51
+ "29": "brandt_cormorant",
52
+ "30": "brewer_blackbird",
53
+ "31": "brewer_sparrow",
54
+ "32": "bronzed_cowbird",
55
+ "33": "brown_creeper",
56
+ "34": "brown_pelican",
57
+ "35": "brown_thrasher",
58
+ "36": "cactus_wren",
59
+ "37": "california_gull",
60
+ "38": "canada_warbler",
61
+ "39": "cape_glossy_starling",
62
+ "40": "cape_may_warbler",
63
+ "41": "cardinal",
64
+ "42": "carolina_wren",
65
+ "43": "caspian_tern",
66
+ "44": "cedar_waxwing",
67
+ "45": "cerulean_warbler",
68
+ "46": "chestnut_sided_warbler",
69
+ "47": "chipping_sparrow",
70
+ "48": "chuck_will_widow",
71
+ "49": "clark_nutcracker",
72
+ "50": "clay_colored_sparrow",
73
+ "51": "cliff_swallow",
74
+ "52": "common_raven",
75
+ "53": "common_tern",
76
+ "54": "common_yellowthroat",
77
+ "55": "crested_auklet",
78
+ "56": "dark_eyed_junco",
79
+ "57": "downy_woodpecker",
80
+ "58": "eared_grebe",
81
+ "59": "eastern_towhee",
82
+ "60": "elegant_tern",
83
+ "61": "european_goldfinch",
84
+ "62": "evening_grosbeak",
85
+ "63": "field_sparrow",
86
+ "64": "fish_crow",
87
+ "65": "florida_jay",
88
+ "66": "forsters_tern",
89
+ "67": "fox_sparrow",
90
+ "68": "frigatebird",
91
+ "69": "gadwall",
92
+ "70": "geococcyx",
93
+ "71": "glaucous_winged_gull",
94
+ "72": "golden_winged_warbler",
95
+ "73": "grasshopper_sparrow",
96
+ "74": "gray_catbird",
97
+ "75": "gray_crowned_rosy_finch",
98
+ "76": "gray_kingbird",
99
+ "77": "great_crested_flycatcher",
100
+ "78": "great_grey_shrike",
101
+ "79": "green_jay",
102
+ "80": "green_kingfisher",
103
+ "81": "green_tailed_towhee",
104
+ "82": "green_violetear",
105
+ "83": "groove_billed_ani",
106
+ "84": "harris_sparrow",
107
+ "85": "heermann_gull",
108
+ "86": "henslow_sparrow",
109
+ "87": "herring_gull",
110
+ "88": "hooded_merganser",
111
+ "89": "hooded_oriole",
112
+ "90": "hooded_warbler",
113
+ "91": "horned_grebe",
114
+ "92": "horned_lark",
115
+ "93": "horned_puffin",
116
+ "94": "house_sparrow",
117
+ "95": "house_wren",
118
+ "96": "indigo_bunting",
119
+ "97": "ivory_gull",
120
+ "98": "kentucky_warbler",
121
+ "99": "laysan_albatross",
122
+ "100": "lazuli_bunting",
123
+ "101": "le_conte_sparrow",
124
+ "102": "least_auklet",
125
+ "103": "least_flycatcher",
126
+ "104": "least_tern",
127
+ "105": "lincoln_sparrow",
128
+ "106": "loggerhead_shrike",
129
+ "107": "long_tailed_jaeger",
130
+ "108": "louisiana_waterthrush",
131
+ "109": "magnolia_warbler",
132
+ "110": "mallard",
133
+ "111": "mangrove_cuckoo",
134
+ "112": "marsh_wren",
135
+ "113": "mockingbird",
136
+ "114": "mourning_warbler",
137
+ "115": "myrtle_warbler",
138
+ "116": "nashville_warbler",
139
+ "117": "nelson_sharp_tailed_sparrow",
140
+ "118": "nighthawk",
141
+ "119": "northern_flicker",
142
+ "120": "northern_fulmar",
143
+ "121": "northern_waterthrush",
144
+ "122": "olive_sided_flycatcher",
145
+ "123": "orange_crowned_warbler",
146
+ "124": "orchard_oriole",
147
+ "125": "ovenbird",
148
+ "126": "pacific_loon",
149
+ "127": "painted_bunting",
150
+ "128": "palm_warbler",
151
+ "129": "parakeet_auklet",
152
+ "130": "pelagic_cormorant",
153
+ "131": "philadelphia_vireo",
154
+ "132": "pied_billed_grebe",
155
+ "133": "pied_kingfisher",
156
+ "134": "pigeon_guillemot",
157
+ "135": "pileated_woodpecker",
158
+ "136": "pine_grosbeak",
159
+ "137": "pine_warbler",
160
+ "138": "pomarine_jaeger",
161
+ "139": "prairie_warbler",
162
+ "140": "prothonotary_warbler",
163
+ "141": "purple_finch",
164
+ "142": "red_bellied_woodpecker",
165
+ "143": "red_breasted_merganser",
166
+ "144": "red_cockaded_woodpecker",
167
+ "145": "red_eyed_vireo",
168
+ "146": "red_faced_cormorant",
169
+ "147": "red_headed_woodpecker",
170
+ "148": "red_legged_kittiwake",
171
+ "149": "red_winged_blackbird",
172
+ "150": "rhinoceros_auklet",
173
+ "151": "ring_billed_gull",
174
+ "152": "ringed_kingfisher",
175
+ "153": "rock_wren",
176
+ "154": "rose_breasted_grosbeak",
177
+ "155": "ruby_throated_hummingbird",
178
+ "156": "rufous_hummingbird",
179
+ "157": "rusty_blackbird",
180
+ "158": "sage_thrasher",
181
+ "159": "savannah_sparrow",
182
+ "160": "sayornis",
183
+ "161": "scarlet_tanager",
184
+ "162": "scissor_tailed_flycatcher",
185
+ "163": "scott_oriole",
186
+ "164": "seaside_sparrow",
187
+ "165": "shiny_cowbird",
188
+ "166": "slaty_backed_gull",
189
+ "167": "song_sparrow",
190
+ "168": "sooty_albatross",
191
+ "169": "spotted_catbird",
192
+ "170": "summer_tanager",
193
+ "171": "swainson_warbler",
194
+ "172": "tennessee_warbler",
195
+ "173": "tree_sparrow",
196
+ "174": "tree_swallow",
197
+ "175": "tropical_kingbird",
198
+ "176": "vermilion_flycatcher",
199
+ "177": "vesper_sparrow",
200
+ "178": "warbling_vireo",
201
+ "179": "western_grebe",
202
+ "180": "western_gull",
203
+ "181": "western_meadowlark",
204
+ "182": "western_wood_pewee",
205
+ "183": "whip_poor_will",
206
+ "184": "white_breasted_kingfisher",
207
+ "185": "white_breasted_nuthatch",
208
+ "186": "white_crowned_sparrow",
209
+ "187": "white_eyed_vireo",
210
+ "188": "white_necked_raven",
211
+ "189": "white_pelican",
212
+ "190": "white_throated_sparrow",
213
+ "191": "wilson_warbler",
214
+ "192": "winter_wren",
215
+ "193": "worm_eating_warbler",
216
+ "194": "yellow_bellied_flycatcher",
217
+ "195": "yellow_billed_cuckoo",
218
+ "196": "yellow_breasted_chat",
219
+ "197": "yellow_headed_blackbird",
220
+ "198": "yellow_throated_vireo",
221
+ "199": "yellow_warbler"
222
+ },
223
+ "image_size": 224,
224
+ "initializer_range": 0.02,
225
+ "label2id": {
226
+ "acadian_flycatcher": 0,
227
+ "american_crow": 1,
228
+ "american_goldfinch": 2,
229
+ "american_pipit": 3,
230
+ "american_redstart": 4,
231
+ "american_three_toed_woodpecker": 5,
232
+ "anna_hummingbird": 6,
233
+ "artic_tern": 7,
234
+ "baird_sparrow": 8,
235
+ "baltimore_oriole": 9,
236
+ "bank_swallow": 10,
237
+ "barn_swallow": 11,
238
+ "bay_breasted_warbler": 12,
239
+ "belted_kingfisher": 13,
240
+ "bewick_wren": 14,
241
+ "black_and_white_warbler": 15,
242
+ "black_billed_cuckoo": 16,
243
+ "black_capped_vireo": 17,
244
+ "black_footed_albatross": 18,
245
+ "black_tern": 19,
246
+ "black_throated_blue_warbler": 20,
247
+ "black_throated_sparrow": 21,
248
+ "blue_grosbeak": 22,
249
+ "blue_headed_vireo": 23,
250
+ "blue_jay": 24,
251
+ "blue_winged_warbler": 25,
252
+ "boat_tailed_grackle": 26,
253
+ "bobolink": 27,
254
+ "bohemian_waxwing": 28,
255
+ "brandt_cormorant": 29,
256
+ "brewer_blackbird": 30,
257
+ "brewer_sparrow": 31,
258
+ "bronzed_cowbird": 32,
259
+ "brown_creeper": 33,
260
+ "brown_pelican": 34,
261
+ "brown_thrasher": 35,
262
+ "cactus_wren": 36,
263
+ "california_gull": 37,
264
+ "canada_warbler": 38,
265
+ "cape_glossy_starling": 39,
266
+ "cape_may_warbler": 40,
267
+ "cardinal": 41,
268
+ "carolina_wren": 42,
269
+ "caspian_tern": 43,
270
+ "cedar_waxwing": 44,
271
+ "cerulean_warbler": 45,
272
+ "chestnut_sided_warbler": 46,
273
+ "chipping_sparrow": 47,
274
+ "chuck_will_widow": 48,
275
+ "clark_nutcracker": 49,
276
+ "clay_colored_sparrow": 50,
277
+ "cliff_swallow": 51,
278
+ "common_raven": 52,
279
+ "common_tern": 53,
280
+ "common_yellowthroat": 54,
281
+ "crested_auklet": 55,
282
+ "dark_eyed_junco": 56,
283
+ "downy_woodpecker": 57,
284
+ "eared_grebe": 58,
285
+ "eastern_towhee": 59,
286
+ "elegant_tern": 60,
287
+ "european_goldfinch": 61,
288
+ "evening_grosbeak": 62,
289
+ "field_sparrow": 63,
290
+ "fish_crow": 64,
291
+ "florida_jay": 65,
292
+ "forsters_tern": 66,
293
+ "fox_sparrow": 67,
294
+ "frigatebird": 68,
295
+ "gadwall": 69,
296
+ "geococcyx": 70,
297
+ "glaucous_winged_gull": 71,
298
+ "golden_winged_warbler": 72,
299
+ "grasshopper_sparrow": 73,
300
+ "gray_catbird": 74,
301
+ "gray_crowned_rosy_finch": 75,
302
+ "gray_kingbird": 76,
303
+ "great_crested_flycatcher": 77,
304
+ "great_grey_shrike": 78,
305
+ "green_jay": 79,
306
+ "green_kingfisher": 80,
307
+ "green_tailed_towhee": 81,
308
+ "green_violetear": 82,
309
+ "groove_billed_ani": 83,
310
+ "harris_sparrow": 84,
311
+ "heermann_gull": 85,
312
+ "henslow_sparrow": 86,
313
+ "herring_gull": 87,
314
+ "hooded_merganser": 88,
315
+ "hooded_oriole": 89,
316
+ "hooded_warbler": 90,
317
+ "horned_grebe": 91,
318
+ "horned_lark": 92,
319
+ "horned_puffin": 93,
320
+ "house_sparrow": 94,
321
+ "house_wren": 95,
322
+ "indigo_bunting": 96,
323
+ "ivory_gull": 97,
324
+ "kentucky_warbler": 98,
325
+ "laysan_albatross": 99,
326
+ "lazuli_bunting": 100,
327
+ "le_conte_sparrow": 101,
328
+ "least_auklet": 102,
329
+ "least_flycatcher": 103,
330
+ "least_tern": 104,
331
+ "lincoln_sparrow": 105,
332
+ "loggerhead_shrike": 106,
333
+ "long_tailed_jaeger": 107,
334
+ "louisiana_waterthrush": 108,
335
+ "magnolia_warbler": 109,
336
+ "mallard": 110,
337
+ "mangrove_cuckoo": 111,
338
+ "marsh_wren": 112,
339
+ "mockingbird": 113,
340
+ "mourning_warbler": 114,
341
+ "myrtle_warbler": 115,
342
+ "nashville_warbler": 116,
343
+ "nelson_sharp_tailed_sparrow": 117,
344
+ "nighthawk": 118,
345
+ "northern_flicker": 119,
346
+ "northern_fulmar": 120,
347
+ "northern_waterthrush": 121,
348
+ "olive_sided_flycatcher": 122,
349
+ "orange_crowned_warbler": 123,
350
+ "orchard_oriole": 124,
351
+ "ovenbird": 125,
352
+ "pacific_loon": 126,
353
+ "painted_bunting": 127,
354
+ "palm_warbler": 128,
355
+ "parakeet_auklet": 129,
356
+ "pelagic_cormorant": 130,
357
+ "philadelphia_vireo": 131,
358
+ "pied_billed_grebe": 132,
359
+ "pied_kingfisher": 133,
360
+ "pigeon_guillemot": 134,
361
+ "pileated_woodpecker": 135,
362
+ "pine_grosbeak": 136,
363
+ "pine_warbler": 137,
364
+ "pomarine_jaeger": 138,
365
+ "prairie_warbler": 139,
366
+ "prothonotary_warbler": 140,
367
+ "purple_finch": 141,
368
+ "red_bellied_woodpecker": 142,
369
+ "red_breasted_merganser": 143,
370
+ "red_cockaded_woodpecker": 144,
371
+ "red_eyed_vireo": 145,
372
+ "red_faced_cormorant": 146,
373
+ "red_headed_woodpecker": 147,
374
+ "red_legged_kittiwake": 148,
375
+ "red_winged_blackbird": 149,
376
+ "rhinoceros_auklet": 150,
377
+ "ring_billed_gull": 151,
378
+ "ringed_kingfisher": 152,
379
+ "rock_wren": 153,
380
+ "rose_breasted_grosbeak": 154,
381
+ "ruby_throated_hummingbird": 155,
382
+ "rufous_hummingbird": 156,
383
+ "rusty_blackbird": 157,
384
+ "sage_thrasher": 158,
385
+ "savannah_sparrow": 159,
386
+ "sayornis": 160,
387
+ "scarlet_tanager": 161,
388
+ "scissor_tailed_flycatcher": 162,
389
+ "scott_oriole": 163,
390
+ "seaside_sparrow": 164,
391
+ "shiny_cowbird": 165,
392
+ "slaty_backed_gull": 166,
393
+ "song_sparrow": 167,
394
+ "sooty_albatross": 168,
395
+ "spotted_catbird": 169,
396
+ "summer_tanager": 170,
397
+ "swainson_warbler": 171,
398
+ "tennessee_warbler": 172,
399
+ "tree_sparrow": 173,
400
+ "tree_swallow": 174,
401
+ "tropical_kingbird": 175,
402
+ "vermilion_flycatcher": 176,
403
+ "vesper_sparrow": 177,
404
+ "warbling_vireo": 178,
405
+ "western_grebe": 179,
406
+ "western_gull": 180,
407
+ "western_meadowlark": 181,
408
+ "western_wood_pewee": 182,
409
+ "whip_poor_will": 183,
410
+ "white_breasted_kingfisher": 184,
411
+ "white_breasted_nuthatch": 185,
412
+ "white_crowned_sparrow": 186,
413
+ "white_eyed_vireo": 187,
414
+ "white_necked_raven": 188,
415
+ "white_pelican": 189,
416
+ "white_throated_sparrow": 190,
417
+ "wilson_warbler": 191,
418
+ "winter_wren": 192,
419
+ "worm_eating_warbler": 193,
420
+ "yellow_bellied_flycatcher": 194,
421
+ "yellow_billed_cuckoo": 195,
422
+ "yellow_breasted_chat": 196,
423
+ "yellow_headed_blackbird": 197,
424
+ "yellow_throated_vireo": 198,
425
+ "yellow_warbler": 199
426
+ },
427
+ "layer_norm_eps": 1e-05,
428
+ "mlp_ratio": 4.0,
429
+ "model_type": "swin",
430
+ "num_channels": 3,
431
+ "num_heads": [
432
+ 4,
433
+ 8,
434
+ 16,
435
+ 32
436
+ ],
437
+ "num_layers": 4,
438
+ "out_features": [
439
+ "stage4"
440
+ ],
441
+ "out_indices": [
442
+ 4
443
+ ],
444
+ "patch_size": 4,
445
+ "path_norm": true,
446
+ "problem_type": "single_label_classification",
447
+ "qkv_bias": true,
448
+ "stage_names": [
449
+ "stem",
450
+ "stage1",
451
+ "stage2",
452
+ "stage3",
453
+ "stage4"
454
+ ],
455
+ "torch_dtype": "float32",
456
+ "transformers_version": "4.48.0",
457
+ "use_absolute_embeddings": false,
458
+ "window_size": 7
459
+ }
checkpoint-1275/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:145e6aaae71e94de31b985ff1f2edcf9ca2d96f63bd4758700b5bfe07bceb9b5
3
+ size 14244
checkpoint-1275/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abfb41d8963e26d42b1b2b5d933e991567e940c3a7bfa1df8fa6831c0d990e47
3
+ size 1064
checkpoint-1275/trainer_state.json ADDED
@@ -0,0 +1,1766 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3975684940814972,
3
+ "best_model_checkpoint": "Model-Swin-Transformer-\\checkpoint-1275",
4
+ "epoch": 25.0,
5
+ "eval_steps": 7,
6
+ "global_step": 1275,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1390728476821192,
13
+ "grad_norm": 3.756645917892456,
14
+ "learning_rate": 7.000000000000001e-07,
15
+ "loss": 5.3644,
16
+ "step": 7
17
+ },
18
+ {
19
+ "epoch": 0.2781456953642384,
20
+ "grad_norm": 3.558716297149658,
21
+ "learning_rate": 1.4000000000000001e-06,
22
+ "loss": 5.3658,
23
+ "step": 14
24
+ },
25
+ {
26
+ "epoch": 0.41721854304635764,
27
+ "grad_norm": 3.7727837562561035,
28
+ "learning_rate": 2.1000000000000002e-06,
29
+ "loss": 5.3463,
30
+ "step": 21
31
+ },
32
+ {
33
+ "epoch": 0.5562913907284768,
34
+ "grad_norm": 3.529733896255493,
35
+ "learning_rate": 2.8000000000000003e-06,
36
+ "loss": 5.3343,
37
+ "step": 28
38
+ },
39
+ {
40
+ "epoch": 0.695364238410596,
41
+ "grad_norm": 4.100412368774414,
42
+ "learning_rate": 3.5000000000000004e-06,
43
+ "loss": 5.3713,
44
+ "step": 35
45
+ },
46
+ {
47
+ "epoch": 0.8344370860927153,
48
+ "grad_norm": 4.024387836456299,
49
+ "learning_rate": 4.2000000000000004e-06,
50
+ "loss": 5.3528,
51
+ "step": 42
52
+ },
53
+ {
54
+ "epoch": 0.9735099337748344,
55
+ "grad_norm": 3.3524274826049805,
56
+ "learning_rate": 4.9000000000000005e-06,
57
+ "loss": 5.3086,
58
+ "step": 49
59
+ },
60
+ {
61
+ "epoch": 1.0,
62
+ "eval_accuracy": 0.00819672131147541,
63
+ "eval_f1_macro": 0.004708259390712807,
64
+ "eval_f1_micro": 0.00819672131147541,
65
+ "eval_f1_weighted": 0.005191171978413603,
66
+ "eval_loss": 5.3028178215026855,
67
+ "eval_precision_macro": 0.004008928571428571,
68
+ "eval_precision_micro": 0.00819672131147541,
69
+ "eval_precision_weighted": 0.004471604215456675,
70
+ "eval_recall_macro": 0.007458333333333333,
71
+ "eval_recall_micro": 0.00819672131147541,
72
+ "eval_recall_weighted": 0.00819672131147541,
73
+ "eval_runtime": 18.3225,
74
+ "eval_samples_per_second": 66.585,
75
+ "eval_steps_per_second": 1.092,
76
+ "step": 51
77
+ },
78
+ {
79
+ "epoch": 1.099337748344371,
80
+ "grad_norm": 3.333894729614258,
81
+ "learning_rate": 5.600000000000001e-06,
82
+ "loss": 4.7989,
83
+ "step": 56
84
+ },
85
+ {
86
+ "epoch": 1.23841059602649,
87
+ "grad_norm": 3.226445198059082,
88
+ "learning_rate": 6.300000000000001e-06,
89
+ "loss": 5.2748,
90
+ "step": 63
91
+ },
92
+ {
93
+ "epoch": 1.3774834437086092,
94
+ "grad_norm": 2.960792303085327,
95
+ "learning_rate": 7.000000000000001e-06,
96
+ "loss": 5.2799,
97
+ "step": 70
98
+ },
99
+ {
100
+ "epoch": 1.5165562913907285,
101
+ "grad_norm": 2.967210054397583,
102
+ "learning_rate": 7.7e-06,
103
+ "loss": 5.2765,
104
+ "step": 77
105
+ },
106
+ {
107
+ "epoch": 1.6556291390728477,
108
+ "grad_norm": 2.8297080993652344,
109
+ "learning_rate": 8.400000000000001e-06,
110
+ "loss": 5.2309,
111
+ "step": 84
112
+ },
113
+ {
114
+ "epoch": 1.794701986754967,
115
+ "grad_norm": 3.001478433609009,
116
+ "learning_rate": 9.100000000000001e-06,
117
+ "loss": 5.2076,
118
+ "step": 91
119
+ },
120
+ {
121
+ "epoch": 1.9337748344370862,
122
+ "grad_norm": 3.130455493927002,
123
+ "learning_rate": 9.800000000000001e-06,
124
+ "loss": 5.1799,
125
+ "step": 98
126
+ },
127
+ {
128
+ "epoch": 2.0,
129
+ "eval_accuracy": 0.031967213114754096,
130
+ "eval_f1_macro": 0.02138895151975647,
131
+ "eval_f1_micro": 0.031967213114754096,
132
+ "eval_f1_weighted": 0.022752398833071827,
133
+ "eval_loss": 5.114638805389404,
134
+ "eval_precision_macro": 0.02439267581077926,
135
+ "eval_precision_micro": 0.031967213114754096,
136
+ "eval_precision_weighted": 0.026138166290088277,
137
+ "eval_recall_macro": 0.030029761904761903,
138
+ "eval_recall_micro": 0.031967213114754096,
139
+ "eval_recall_weighted": 0.031967213114754096,
140
+ "eval_runtime": 19.7575,
141
+ "eval_samples_per_second": 61.749,
142
+ "eval_steps_per_second": 1.012,
143
+ "step": 102
144
+ },
145
+ {
146
+ "epoch": 2.0596026490066226,
147
+ "grad_norm": 3.9784674644470215,
148
+ "learning_rate": 1.05e-05,
149
+ "loss": 4.6593,
150
+ "step": 105
151
+ },
152
+ {
153
+ "epoch": 2.198675496688742,
154
+ "grad_norm": 4.487362861633301,
155
+ "learning_rate": 1.1200000000000001e-05,
156
+ "loss": 5.1011,
157
+ "step": 112
158
+ },
159
+ {
160
+ "epoch": 2.337748344370861,
161
+ "grad_norm": 3.840144157409668,
162
+ "learning_rate": 1.19e-05,
163
+ "loss": 5.0346,
164
+ "step": 119
165
+ },
166
+ {
167
+ "epoch": 2.47682119205298,
168
+ "grad_norm": 4.5351409912109375,
169
+ "learning_rate": 1.2600000000000001e-05,
170
+ "loss": 4.993,
171
+ "step": 126
172
+ },
173
+ {
174
+ "epoch": 2.6158940397350996,
175
+ "grad_norm": 5.248815059661865,
176
+ "learning_rate": 1.3300000000000001e-05,
177
+ "loss": 4.9318,
178
+ "step": 133
179
+ },
180
+ {
181
+ "epoch": 2.7549668874172184,
182
+ "grad_norm": 5.679067611694336,
183
+ "learning_rate": 1.4000000000000001e-05,
184
+ "loss": 4.8848,
185
+ "step": 140
186
+ },
187
+ {
188
+ "epoch": 2.8940397350993377,
189
+ "grad_norm": 5.753520488739014,
190
+ "learning_rate": 1.47e-05,
191
+ "loss": 4.7788,
192
+ "step": 147
193
+ },
194
+ {
195
+ "epoch": 3.0,
196
+ "eval_accuracy": 0.20163934426229507,
197
+ "eval_f1_macro": 0.15865115752245537,
198
+ "eval_f1_micro": 0.20163934426229507,
199
+ "eval_f1_weighted": 0.16819957965187343,
200
+ "eval_loss": 4.359624862670898,
201
+ "eval_precision_macro": 0.2004175752293943,
202
+ "eval_precision_micro": 0.20163934426229507,
203
+ "eval_precision_weighted": 0.21056413625270873,
204
+ "eval_recall_macro": 0.18860714285714286,
205
+ "eval_recall_micro": 0.20163934426229507,
206
+ "eval_recall_weighted": 0.20163934426229507,
207
+ "eval_runtime": 16.799,
208
+ "eval_samples_per_second": 72.623,
209
+ "eval_steps_per_second": 1.191,
210
+ "step": 153
211
+ },
212
+ {
213
+ "epoch": 3.019867549668874,
214
+ "grad_norm": 7.467461585998535,
215
+ "learning_rate": 1.54e-05,
216
+ "loss": 4.1979,
217
+ "step": 154
218
+ },
219
+ {
220
+ "epoch": 3.1589403973509933,
221
+ "grad_norm": 8.049297332763672,
222
+ "learning_rate": 1.6100000000000002e-05,
223
+ "loss": 4.3989,
224
+ "step": 161
225
+ },
226
+ {
227
+ "epoch": 3.2980132450331126,
228
+ "grad_norm": 15.375213623046875,
229
+ "learning_rate": 1.6800000000000002e-05,
230
+ "loss": 4.2083,
231
+ "step": 168
232
+ },
233
+ {
234
+ "epoch": 3.437086092715232,
235
+ "grad_norm": 9.002079963684082,
236
+ "learning_rate": 1.75e-05,
237
+ "loss": 4.0218,
238
+ "step": 175
239
+ },
240
+ {
241
+ "epoch": 3.576158940397351,
242
+ "grad_norm": 9.430607795715332,
243
+ "learning_rate": 1.8200000000000002e-05,
244
+ "loss": 3.8887,
245
+ "step": 182
246
+ },
247
+ {
248
+ "epoch": 3.7152317880794703,
249
+ "grad_norm": 8.839299201965332,
250
+ "learning_rate": 1.8900000000000002e-05,
251
+ "loss": 3.6825,
252
+ "step": 189
253
+ },
254
+ {
255
+ "epoch": 3.8543046357615895,
256
+ "grad_norm": 10.325126647949219,
257
+ "learning_rate": 1.9600000000000002e-05,
258
+ "loss": 3.5607,
259
+ "step": 196
260
+ },
261
+ {
262
+ "epoch": 3.993377483443709,
263
+ "grad_norm": 10.668830871582031,
264
+ "learning_rate": 2.0300000000000002e-05,
265
+ "loss": 3.3596,
266
+ "step": 203
267
+ },
268
+ {
269
+ "epoch": 4.0,
270
+ "eval_accuracy": 0.4819672131147541,
271
+ "eval_f1_macro": 0.4183671558365781,
272
+ "eval_f1_micro": 0.4819672131147541,
273
+ "eval_f1_weighted": 0.4325206736160847,
274
+ "eval_loss": 2.7524967193603516,
275
+ "eval_precision_macro": 0.4532485780774093,
276
+ "eval_precision_micro": 0.4819672131147541,
277
+ "eval_precision_weighted": 0.4607049565114263,
278
+ "eval_recall_macro": 0.4596547619047619,
279
+ "eval_recall_micro": 0.4819672131147541,
280
+ "eval_recall_weighted": 0.4819672131147541,
281
+ "eval_runtime": 21.0215,
282
+ "eval_samples_per_second": 58.036,
283
+ "eval_steps_per_second": 0.951,
284
+ "step": 204
285
+ },
286
+ {
287
+ "epoch": 4.119205298013245,
288
+ "grad_norm": 14.15578842163086,
289
+ "learning_rate": 2.1e-05,
290
+ "loss": 2.792,
291
+ "step": 210
292
+ },
293
+ {
294
+ "epoch": 4.258278145695364,
295
+ "grad_norm": 13.17058277130127,
296
+ "learning_rate": 2.1700000000000002e-05,
297
+ "loss": 2.9187,
298
+ "step": 217
299
+ },
300
+ {
301
+ "epoch": 4.397350993377484,
302
+ "grad_norm": 17.613101959228516,
303
+ "learning_rate": 2.2400000000000002e-05,
304
+ "loss": 2.8103,
305
+ "step": 224
306
+ },
307
+ {
308
+ "epoch": 4.5364238410596025,
309
+ "grad_norm": 10.50324821472168,
310
+ "learning_rate": 2.3100000000000002e-05,
311
+ "loss": 2.6919,
312
+ "step": 231
313
+ },
314
+ {
315
+ "epoch": 4.675496688741722,
316
+ "grad_norm": 9.514472007751465,
317
+ "learning_rate": 2.38e-05,
318
+ "loss": 2.4714,
319
+ "step": 238
320
+ },
321
+ {
322
+ "epoch": 4.814569536423841,
323
+ "grad_norm": 9.27059268951416,
324
+ "learning_rate": 2.45e-05,
325
+ "loss": 2.4165,
326
+ "step": 245
327
+ },
328
+ {
329
+ "epoch": 4.95364238410596,
330
+ "grad_norm": 14.103813171386719,
331
+ "learning_rate": 2.5200000000000003e-05,
332
+ "loss": 2.2583,
333
+ "step": 252
334
+ },
335
+ {
336
+ "epoch": 5.0,
337
+ "eval_accuracy": 0.6754098360655738,
338
+ "eval_f1_macro": 0.6554426723087714,
339
+ "eval_f1_micro": 0.6754098360655738,
340
+ "eval_f1_weighted": 0.661180283037551,
341
+ "eval_loss": 1.6013199090957642,
342
+ "eval_precision_macro": 0.7148540158456954,
343
+ "eval_precision_micro": 0.6754098360655738,
344
+ "eval_precision_weighted": 0.7134497831635958,
345
+ "eval_recall_macro": 0.6648869047619047,
346
+ "eval_recall_micro": 0.6754098360655738,
347
+ "eval_recall_weighted": 0.6754098360655738,
348
+ "eval_runtime": 17.8695,
349
+ "eval_samples_per_second": 68.273,
350
+ "eval_steps_per_second": 1.119,
351
+ "step": 255
352
+ },
353
+ {
354
+ "epoch": 5.079470198675497,
355
+ "grad_norm": 10.124486923217773,
356
+ "learning_rate": 2.5900000000000003e-05,
357
+ "loss": 1.7994,
358
+ "step": 259
359
+ },
360
+ {
361
+ "epoch": 5.218543046357616,
362
+ "grad_norm": 9.2846040725708,
363
+ "learning_rate": 2.6600000000000003e-05,
364
+ "loss": 1.8769,
365
+ "step": 266
366
+ },
367
+ {
368
+ "epoch": 5.357615894039735,
369
+ "grad_norm": 9.681060791015625,
370
+ "learning_rate": 2.7300000000000003e-05,
371
+ "loss": 1.8592,
372
+ "step": 273
373
+ },
374
+ {
375
+ "epoch": 5.496688741721854,
376
+ "grad_norm": 10.057909965515137,
377
+ "learning_rate": 2.8000000000000003e-05,
378
+ "loss": 1.7236,
379
+ "step": 280
380
+ },
381
+ {
382
+ "epoch": 5.635761589403973,
383
+ "grad_norm": 11.661372184753418,
384
+ "learning_rate": 2.87e-05,
385
+ "loss": 1.7951,
386
+ "step": 287
387
+ },
388
+ {
389
+ "epoch": 5.774834437086093,
390
+ "grad_norm": 9.344789505004883,
391
+ "learning_rate": 2.94e-05,
392
+ "loss": 1.6862,
393
+ "step": 294
394
+ },
395
+ {
396
+ "epoch": 5.913907284768212,
397
+ "grad_norm": 9.517767906188965,
398
+ "learning_rate": 3.01e-05,
399
+ "loss": 1.5559,
400
+ "step": 301
401
+ },
402
+ {
403
+ "epoch": 6.0,
404
+ "eval_accuracy": 0.7745901639344263,
405
+ "eval_f1_macro": 0.7566554592638184,
406
+ "eval_f1_micro": 0.7745901639344263,
407
+ "eval_f1_weighted": 0.7613547559463157,
408
+ "eval_loss": 1.02675461769104,
409
+ "eval_precision_macro": 0.799015755078255,
410
+ "eval_precision_micro": 0.7745901639344263,
411
+ "eval_precision_weighted": 0.802561029589718,
412
+ "eval_recall_macro": 0.767904761904762,
413
+ "eval_recall_micro": 0.7745901639344263,
414
+ "eval_recall_weighted": 0.7745901639344263,
415
+ "eval_runtime": 20.701,
416
+ "eval_samples_per_second": 58.934,
417
+ "eval_steps_per_second": 0.966,
418
+ "step": 306
419
+ },
420
+ {
421
+ "epoch": 6.039735099337748,
422
+ "grad_norm": 8.714813232421875,
423
+ "learning_rate": 3.08e-05,
424
+ "loss": 1.3741,
425
+ "step": 308
426
+ },
427
+ {
428
+ "epoch": 6.178807947019868,
429
+ "grad_norm": 8.35527515411377,
430
+ "learning_rate": 3.15e-05,
431
+ "loss": 1.355,
432
+ "step": 315
433
+ },
434
+ {
435
+ "epoch": 6.317880794701987,
436
+ "grad_norm": 10.128329277038574,
437
+ "learning_rate": 3.2200000000000003e-05,
438
+ "loss": 1.31,
439
+ "step": 322
440
+ },
441
+ {
442
+ "epoch": 6.456953642384106,
443
+ "grad_norm": 8.156630516052246,
444
+ "learning_rate": 3.29e-05,
445
+ "loss": 1.2743,
446
+ "step": 329
447
+ },
448
+ {
449
+ "epoch": 6.596026490066225,
450
+ "grad_norm": 8.779105186462402,
451
+ "learning_rate": 3.3600000000000004e-05,
452
+ "loss": 1.3193,
453
+ "step": 336
454
+ },
455
+ {
456
+ "epoch": 6.735099337748345,
457
+ "grad_norm": 11.118610382080078,
458
+ "learning_rate": 3.430000000000001e-05,
459
+ "loss": 1.2748,
460
+ "step": 343
461
+ },
462
+ {
463
+ "epoch": 6.874172185430464,
464
+ "grad_norm": 8.063292503356934,
465
+ "learning_rate": 3.5e-05,
466
+ "loss": 1.2319,
467
+ "step": 350
468
+ },
469
+ {
470
+ "epoch": 7.0,
471
+ "grad_norm": 7.127477169036865,
472
+ "learning_rate": 3.57e-05,
473
+ "loss": 1.0089,
474
+ "step": 357
475
+ },
476
+ {
477
+ "epoch": 7.0,
478
+ "eval_accuracy": 0.8016393442622951,
479
+ "eval_f1_macro": 0.7894899096002038,
480
+ "eval_f1_micro": 0.8016393442622951,
481
+ "eval_f1_weighted": 0.7904804760759438,
482
+ "eval_loss": 0.7927541136741638,
483
+ "eval_precision_macro": 0.8299506911217438,
484
+ "eval_precision_micro": 0.8016393442622951,
485
+ "eval_precision_weighted": 0.8283684745893546,
486
+ "eval_recall_macro": 0.7991785714285714,
487
+ "eval_recall_micro": 0.8016393442622951,
488
+ "eval_recall_weighted": 0.8016393442622951,
489
+ "eval_runtime": 19.3393,
490
+ "eval_samples_per_second": 63.084,
491
+ "eval_steps_per_second": 1.034,
492
+ "step": 357
493
+ },
494
+ {
495
+ "epoch": 7.139072847682119,
496
+ "grad_norm": 9.581562995910645,
497
+ "learning_rate": 3.6400000000000004e-05,
498
+ "loss": 1.0836,
499
+ "step": 364
500
+ },
501
+ {
502
+ "epoch": 7.2781456953642385,
503
+ "grad_norm": 11.660162925720215,
504
+ "learning_rate": 3.71e-05,
505
+ "loss": 0.9816,
506
+ "step": 371
507
+ },
508
+ {
509
+ "epoch": 7.417218543046357,
510
+ "grad_norm": 10.361519813537598,
511
+ "learning_rate": 3.7800000000000004e-05,
512
+ "loss": 1.1386,
513
+ "step": 378
514
+ },
515
+ {
516
+ "epoch": 7.556291390728477,
517
+ "grad_norm": 8.699442863464355,
518
+ "learning_rate": 3.85e-05,
519
+ "loss": 0.9964,
520
+ "step": 385
521
+ },
522
+ {
523
+ "epoch": 7.695364238410596,
524
+ "grad_norm": 9.233181953430176,
525
+ "learning_rate": 3.9200000000000004e-05,
526
+ "loss": 1.077,
527
+ "step": 392
528
+ },
529
+ {
530
+ "epoch": 7.8344370860927155,
531
+ "grad_norm": 10.234973907470703,
532
+ "learning_rate": 3.99e-05,
533
+ "loss": 0.9844,
534
+ "step": 399
535
+ },
536
+ {
537
+ "epoch": 7.973509933774834,
538
+ "grad_norm": 9.366828918457031,
539
+ "learning_rate": 4.0600000000000004e-05,
540
+ "loss": 1.0463,
541
+ "step": 406
542
+ },
543
+ {
544
+ "epoch": 8.0,
545
+ "eval_accuracy": 0.8311475409836065,
546
+ "eval_f1_macro": 0.8222988632697376,
547
+ "eval_f1_micro": 0.8311475409836065,
548
+ "eval_f1_weighted": 0.8253316645805044,
549
+ "eval_loss": 0.6437082886695862,
550
+ "eval_precision_macro": 0.8575361999111999,
551
+ "eval_precision_micro": 0.8311475409836065,
552
+ "eval_precision_weighted": 0.8598102466750007,
553
+ "eval_recall_macro": 0.8266071428571429,
554
+ "eval_recall_micro": 0.8311475409836065,
555
+ "eval_recall_weighted": 0.8311475409836065,
556
+ "eval_runtime": 16.6963,
557
+ "eval_samples_per_second": 73.07,
558
+ "eval_steps_per_second": 1.198,
559
+ "step": 408
560
+ },
561
+ {
562
+ "epoch": 8.099337748344372,
563
+ "grad_norm": 8.740901947021484,
564
+ "learning_rate": 4.13e-05,
565
+ "loss": 0.8633,
566
+ "step": 413
567
+ },
568
+ {
569
+ "epoch": 8.23841059602649,
570
+ "grad_norm": 9.237879753112793,
571
+ "learning_rate": 4.2e-05,
572
+ "loss": 0.978,
573
+ "step": 420
574
+ },
575
+ {
576
+ "epoch": 8.37748344370861,
577
+ "grad_norm": 9.53095817565918,
578
+ "learning_rate": 4.27e-05,
579
+ "loss": 0.9132,
580
+ "step": 427
581
+ },
582
+ {
583
+ "epoch": 8.516556291390728,
584
+ "grad_norm": 9.277670860290527,
585
+ "learning_rate": 4.3400000000000005e-05,
586
+ "loss": 0.8908,
587
+ "step": 434
588
+ },
589
+ {
590
+ "epoch": 8.655629139072847,
591
+ "grad_norm": 7.736002445220947,
592
+ "learning_rate": 4.41e-05,
593
+ "loss": 0.9602,
594
+ "step": 441
595
+ },
596
+ {
597
+ "epoch": 8.794701986754967,
598
+ "grad_norm": 9.285863876342773,
599
+ "learning_rate": 4.4800000000000005e-05,
600
+ "loss": 0.8773,
601
+ "step": 448
602
+ },
603
+ {
604
+ "epoch": 8.933774834437086,
605
+ "grad_norm": 6.872951507568359,
606
+ "learning_rate": 4.55e-05,
607
+ "loss": 0.8551,
608
+ "step": 455
609
+ },
610
+ {
611
+ "epoch": 9.0,
612
+ "eval_accuracy": 0.8467213114754099,
613
+ "eval_f1_macro": 0.8380145271395273,
614
+ "eval_f1_micro": 0.8467213114754099,
615
+ "eval_f1_weighted": 0.8412076976650266,
616
+ "eval_loss": 0.5831084847450256,
617
+ "eval_precision_macro": 0.8702838485044367,
618
+ "eval_precision_micro": 0.8467213114754099,
619
+ "eval_precision_weighted": 0.8695735086995936,
620
+ "eval_recall_macro": 0.8410952380952381,
621
+ "eval_recall_micro": 0.8467213114754099,
622
+ "eval_recall_weighted": 0.8467213114754099,
623
+ "eval_runtime": 17.1069,
624
+ "eval_samples_per_second": 71.316,
625
+ "eval_steps_per_second": 1.169,
626
+ "step": 459
627
+ },
628
+ {
629
+ "epoch": 9.059602649006623,
630
+ "grad_norm": 9.55445671081543,
631
+ "learning_rate": 4.6200000000000005e-05,
632
+ "loss": 0.7727,
633
+ "step": 462
634
+ },
635
+ {
636
+ "epoch": 9.198675496688741,
637
+ "grad_norm": 10.598060607910156,
638
+ "learning_rate": 4.69e-05,
639
+ "loss": 0.7759,
640
+ "step": 469
641
+ },
642
+ {
643
+ "epoch": 9.33774834437086,
644
+ "grad_norm": 9.53549861907959,
645
+ "learning_rate": 4.76e-05,
646
+ "loss": 0.8016,
647
+ "step": 476
648
+ },
649
+ {
650
+ "epoch": 9.47682119205298,
651
+ "grad_norm": 8.146626472473145,
652
+ "learning_rate": 4.83e-05,
653
+ "loss": 0.779,
654
+ "step": 483
655
+ },
656
+ {
657
+ "epoch": 9.6158940397351,
658
+ "grad_norm": 6.373037815093994,
659
+ "learning_rate": 4.9e-05,
660
+ "loss": 0.6726,
661
+ "step": 490
662
+ },
663
+ {
664
+ "epoch": 9.754966887417218,
665
+ "grad_norm": 9.011489868164062,
666
+ "learning_rate": 4.97e-05,
667
+ "loss": 0.835,
668
+ "step": 497
669
+ },
670
+ {
671
+ "epoch": 9.894039735099337,
672
+ "grad_norm": 8.79944133758545,
673
+ "learning_rate": 4.995555555555556e-05,
674
+ "loss": 0.7234,
675
+ "step": 504
676
+ },
677
+ {
678
+ "epoch": 10.0,
679
+ "eval_accuracy": 0.8672131147540983,
680
+ "eval_f1_macro": 0.8578000912486207,
681
+ "eval_f1_micro": 0.8672131147540983,
682
+ "eval_f1_weighted": 0.8622077336035869,
683
+ "eval_loss": 0.5154255628585815,
684
+ "eval_precision_macro": 0.8773138528138529,
685
+ "eval_precision_micro": 0.8672131147540983,
686
+ "eval_precision_weighted": 0.8793111737988787,
687
+ "eval_recall_macro": 0.8614821428571429,
688
+ "eval_recall_micro": 0.8672131147540983,
689
+ "eval_recall_weighted": 0.8672131147540983,
690
+ "eval_runtime": 18.8743,
691
+ "eval_samples_per_second": 64.638,
692
+ "eval_steps_per_second": 1.06,
693
+ "step": 510
694
+ },
695
+ {
696
+ "epoch": 10.019867549668874,
697
+ "grad_norm": 12.6382474899292,
698
+ "learning_rate": 4.987777777777778e-05,
699
+ "loss": 0.6376,
700
+ "step": 511
701
+ },
702
+ {
703
+ "epoch": 10.158940397350994,
704
+ "grad_norm": 7.875304222106934,
705
+ "learning_rate": 4.9800000000000004e-05,
706
+ "loss": 0.608,
707
+ "step": 518
708
+ },
709
+ {
710
+ "epoch": 10.298013245033113,
711
+ "grad_norm": 8.565564155578613,
712
+ "learning_rate": 4.972222222222223e-05,
713
+ "loss": 0.7333,
714
+ "step": 525
715
+ },
716
+ {
717
+ "epoch": 10.437086092715232,
718
+ "grad_norm": 8.355602264404297,
719
+ "learning_rate": 4.964444444444445e-05,
720
+ "loss": 0.6883,
721
+ "step": 532
722
+ },
723
+ {
724
+ "epoch": 10.57615894039735,
725
+ "grad_norm": 8.657296180725098,
726
+ "learning_rate": 4.956666666666667e-05,
727
+ "loss": 0.7567,
728
+ "step": 539
729
+ },
730
+ {
731
+ "epoch": 10.71523178807947,
732
+ "grad_norm": 7.550002098083496,
733
+ "learning_rate": 4.948888888888889e-05,
734
+ "loss": 0.7422,
735
+ "step": 546
736
+ },
737
+ {
738
+ "epoch": 10.85430463576159,
739
+ "grad_norm": 7.08698844909668,
740
+ "learning_rate": 4.9411111111111114e-05,
741
+ "loss": 0.7045,
742
+ "step": 553
743
+ },
744
+ {
745
+ "epoch": 10.993377483443709,
746
+ "grad_norm": 8.84117317199707,
747
+ "learning_rate": 4.933333333333334e-05,
748
+ "loss": 0.7177,
749
+ "step": 560
750
+ },
751
+ {
752
+ "epoch": 11.0,
753
+ "eval_accuracy": 0.8745901639344262,
754
+ "eval_f1_macro": 0.8674472995614636,
755
+ "eval_f1_micro": 0.8745901639344262,
756
+ "eval_f1_weighted": 0.869469718145237,
757
+ "eval_loss": 0.4802148938179016,
758
+ "eval_precision_macro": 0.8951194083694085,
759
+ "eval_precision_micro": 0.8745901639344262,
760
+ "eval_precision_weighted": 0.8939981489366735,
761
+ "eval_recall_macro": 0.8713035714285715,
762
+ "eval_recall_micro": 0.8745901639344262,
763
+ "eval_recall_weighted": 0.8745901639344262,
764
+ "eval_runtime": 17.7202,
765
+ "eval_samples_per_second": 68.848,
766
+ "eval_steps_per_second": 1.129,
767
+ "step": 561
768
+ },
769
+ {
770
+ "epoch": 11.119205298013245,
771
+ "grad_norm": 9.191046714782715,
772
+ "learning_rate": 4.925555555555556e-05,
773
+ "loss": 0.5652,
774
+ "step": 567
775
+ },
776
+ {
777
+ "epoch": 11.258278145695364,
778
+ "grad_norm": 6.099012851715088,
779
+ "learning_rate": 4.917777777777778e-05,
780
+ "loss": 0.6823,
781
+ "step": 574
782
+ },
783
+ {
784
+ "epoch": 11.397350993377483,
785
+ "grad_norm": 7.644286632537842,
786
+ "learning_rate": 4.91e-05,
787
+ "loss": 0.5704,
788
+ "step": 581
789
+ },
790
+ {
791
+ "epoch": 11.536423841059603,
792
+ "grad_norm": 7.208959579467773,
793
+ "learning_rate": 4.9022222222222224e-05,
794
+ "loss": 0.5889,
795
+ "step": 588
796
+ },
797
+ {
798
+ "epoch": 11.675496688741722,
799
+ "grad_norm": 8.255132675170898,
800
+ "learning_rate": 4.894444444444445e-05,
801
+ "loss": 0.7361,
802
+ "step": 595
803
+ },
804
+ {
805
+ "epoch": 11.814569536423841,
806
+ "grad_norm": 8.378829002380371,
807
+ "learning_rate": 4.886666666666667e-05,
808
+ "loss": 0.6723,
809
+ "step": 602
810
+ },
811
+ {
812
+ "epoch": 11.95364238410596,
813
+ "grad_norm": 11.726705551147461,
814
+ "learning_rate": 4.878888888888889e-05,
815
+ "loss": 0.711,
816
+ "step": 609
817
+ },
818
+ {
819
+ "epoch": 12.0,
820
+ "eval_accuracy": 0.8565573770491803,
821
+ "eval_f1_macro": 0.8515204682228676,
822
+ "eval_f1_micro": 0.8565573770491803,
823
+ "eval_f1_weighted": 0.8536645233724359,
824
+ "eval_loss": 0.4757900536060333,
825
+ "eval_precision_macro": 0.8826355588855589,
826
+ "eval_precision_micro": 0.8565573770491803,
827
+ "eval_precision_weighted": 0.8834410489328521,
828
+ "eval_recall_macro": 0.8537678571428572,
829
+ "eval_recall_micro": 0.8565573770491803,
830
+ "eval_recall_weighted": 0.8565573770491803,
831
+ "eval_runtime": 18.3018,
832
+ "eval_samples_per_second": 66.66,
833
+ "eval_steps_per_second": 1.093,
834
+ "step": 612
835
+ },
836
+ {
837
+ "epoch": 12.079470198675496,
838
+ "grad_norm": 7.42709493637085,
839
+ "learning_rate": 4.871111111111111e-05,
840
+ "loss": 0.5113,
841
+ "step": 616
842
+ },
843
+ {
844
+ "epoch": 12.218543046357617,
845
+ "grad_norm": 9.922298431396484,
846
+ "learning_rate": 4.8633333333333334e-05,
847
+ "loss": 0.6113,
848
+ "step": 623
849
+ },
850
+ {
851
+ "epoch": 12.357615894039736,
852
+ "grad_norm": 5.806549072265625,
853
+ "learning_rate": 4.855555555555556e-05,
854
+ "loss": 0.592,
855
+ "step": 630
856
+ },
857
+ {
858
+ "epoch": 12.496688741721854,
859
+ "grad_norm": 8.837335586547852,
860
+ "learning_rate": 4.847777777777778e-05,
861
+ "loss": 0.604,
862
+ "step": 637
863
+ },
864
+ {
865
+ "epoch": 12.635761589403973,
866
+ "grad_norm": 7.156353950500488,
867
+ "learning_rate": 4.8400000000000004e-05,
868
+ "loss": 0.5203,
869
+ "step": 644
870
+ },
871
+ {
872
+ "epoch": 12.774834437086092,
873
+ "grad_norm": 8.29881477355957,
874
+ "learning_rate": 4.832222222222223e-05,
875
+ "loss": 0.6528,
876
+ "step": 651
877
+ },
878
+ {
879
+ "epoch": 12.913907284768213,
880
+ "grad_norm": 7.490756511688232,
881
+ "learning_rate": 4.824444444444445e-05,
882
+ "loss": 0.5318,
883
+ "step": 658
884
+ },
885
+ {
886
+ "epoch": 13.0,
887
+ "eval_accuracy": 0.8672131147540983,
888
+ "eval_f1_macro": 0.8627146097366685,
889
+ "eval_f1_micro": 0.8672131147540983,
890
+ "eval_f1_weighted": 0.8659745527428748,
891
+ "eval_loss": 0.4569399952888489,
892
+ "eval_precision_macro": 0.8866749639249639,
893
+ "eval_precision_micro": 0.8672131147540983,
894
+ "eval_precision_weighted": 0.888172740283396,
895
+ "eval_recall_macro": 0.8630297619047619,
896
+ "eval_recall_micro": 0.8672131147540983,
897
+ "eval_recall_weighted": 0.8672131147540983,
898
+ "eval_runtime": 17.7039,
899
+ "eval_samples_per_second": 68.911,
900
+ "eval_steps_per_second": 1.13,
901
+ "step": 663
902
+ },
903
+ {
904
+ "epoch": 13.039735099337749,
905
+ "grad_norm": 8.017422676086426,
906
+ "learning_rate": 4.8166666666666674e-05,
907
+ "loss": 0.4727,
908
+ "step": 665
909
+ },
910
+ {
911
+ "epoch": 13.178807947019868,
912
+ "grad_norm": 7.218966007232666,
913
+ "learning_rate": 4.808888888888889e-05,
914
+ "loss": 0.5897,
915
+ "step": 672
916
+ },
917
+ {
918
+ "epoch": 13.317880794701987,
919
+ "grad_norm": 10.478813171386719,
920
+ "learning_rate": 4.8011111111111114e-05,
921
+ "loss": 0.5473,
922
+ "step": 679
923
+ },
924
+ {
925
+ "epoch": 13.456953642384105,
926
+ "grad_norm": 6.682877540588379,
927
+ "learning_rate": 4.793333333333334e-05,
928
+ "loss": 0.5479,
929
+ "step": 686
930
+ },
931
+ {
932
+ "epoch": 13.596026490066226,
933
+ "grad_norm": 12.535813331604004,
934
+ "learning_rate": 4.785555555555556e-05,
935
+ "loss": 0.5458,
936
+ "step": 693
937
+ },
938
+ {
939
+ "epoch": 13.735099337748345,
940
+ "grad_norm": 7.044444561004639,
941
+ "learning_rate": 4.7777777777777784e-05,
942
+ "loss": 0.5401,
943
+ "step": 700
944
+ },
945
+ {
946
+ "epoch": 13.874172185430464,
947
+ "grad_norm": 7.247359752655029,
948
+ "learning_rate": 4.77e-05,
949
+ "loss": 0.5912,
950
+ "step": 707
951
+ },
952
+ {
953
+ "epoch": 14.0,
954
+ "grad_norm": 6.261186122894287,
955
+ "learning_rate": 4.7622222222222224e-05,
956
+ "loss": 0.5383,
957
+ "step": 714
958
+ },
959
+ {
960
+ "epoch": 14.0,
961
+ "eval_accuracy": 0.8786885245901639,
962
+ "eval_f1_macro": 0.87375893714129,
963
+ "eval_f1_micro": 0.8786885245901639,
964
+ "eval_f1_weighted": 0.8755628520466998,
965
+ "eval_loss": 0.4514833092689514,
966
+ "eval_precision_macro": 0.8954447203123673,
967
+ "eval_precision_micro": 0.8786885245901639,
968
+ "eval_precision_weighted": 0.8948432423143127,
969
+ "eval_recall_macro": 0.8754642857142858,
970
+ "eval_recall_micro": 0.8786885245901639,
971
+ "eval_recall_weighted": 0.8786885245901639,
972
+ "eval_runtime": 18.849,
973
+ "eval_samples_per_second": 64.725,
974
+ "eval_steps_per_second": 1.061,
975
+ "step": 714
976
+ },
977
+ {
978
+ "epoch": 14.139072847682119,
979
+ "grad_norm": 8.038314819335938,
980
+ "learning_rate": 4.754444444444445e-05,
981
+ "loss": 0.4757,
982
+ "step": 721
983
+ },
984
+ {
985
+ "epoch": 14.278145695364238,
986
+ "grad_norm": 6.933023929595947,
987
+ "learning_rate": 4.746666666666667e-05,
988
+ "loss": 0.5422,
989
+ "step": 728
990
+ },
991
+ {
992
+ "epoch": 14.417218543046358,
993
+ "grad_norm": 5.427736282348633,
994
+ "learning_rate": 4.7388888888888894e-05,
995
+ "loss": 0.45,
996
+ "step": 735
997
+ },
998
+ {
999
+ "epoch": 14.556291390728477,
1000
+ "grad_norm": 7.140816688537598,
1001
+ "learning_rate": 4.731111111111111e-05,
1002
+ "loss": 0.4801,
1003
+ "step": 742
1004
+ },
1005
+ {
1006
+ "epoch": 14.695364238410596,
1007
+ "grad_norm": 14.195199966430664,
1008
+ "learning_rate": 4.7233333333333334e-05,
1009
+ "loss": 0.4627,
1010
+ "step": 749
1011
+ },
1012
+ {
1013
+ "epoch": 14.834437086092715,
1014
+ "grad_norm": 6.867580413818359,
1015
+ "learning_rate": 4.715555555555556e-05,
1016
+ "loss": 0.4988,
1017
+ "step": 756
1018
+ },
1019
+ {
1020
+ "epoch": 14.973509933774835,
1021
+ "grad_norm": 6.6102519035339355,
1022
+ "learning_rate": 4.707777777777778e-05,
1023
+ "loss": 0.4884,
1024
+ "step": 763
1025
+ },
1026
+ {
1027
+ "epoch": 15.0,
1028
+ "eval_accuracy": 0.8762295081967213,
1029
+ "eval_f1_macro": 0.8721245222748117,
1030
+ "eval_f1_micro": 0.8762295081967213,
1031
+ "eval_f1_weighted": 0.8740396988842932,
1032
+ "eval_loss": 0.4364243745803833,
1033
+ "eval_precision_macro": 0.8978099123099124,
1034
+ "eval_precision_micro": 0.8762295081967213,
1035
+ "eval_precision_weighted": 0.899852774729824,
1036
+ "eval_recall_macro": 0.8744226190476191,
1037
+ "eval_recall_micro": 0.8762295081967213,
1038
+ "eval_recall_weighted": 0.8762295081967213,
1039
+ "eval_runtime": 18.6981,
1040
+ "eval_samples_per_second": 65.247,
1041
+ "eval_steps_per_second": 1.07,
1042
+ "step": 765
1043
+ },
1044
+ {
1045
+ "epoch": 15.099337748344372,
1046
+ "grad_norm": 6.755874156951904,
1047
+ "learning_rate": 4.7e-05,
1048
+ "loss": 0.4067,
1049
+ "step": 770
1050
+ },
1051
+ {
1052
+ "epoch": 15.23841059602649,
1053
+ "grad_norm": 7.634614944458008,
1054
+ "learning_rate": 4.692222222222222e-05,
1055
+ "loss": 0.5742,
1056
+ "step": 777
1057
+ },
1058
+ {
1059
+ "epoch": 15.37748344370861,
1060
+ "grad_norm": 4.661431789398193,
1061
+ "learning_rate": 4.6844444444444444e-05,
1062
+ "loss": 0.4345,
1063
+ "step": 784
1064
+ },
1065
+ {
1066
+ "epoch": 15.516556291390728,
1067
+ "grad_norm": 18.19213104248047,
1068
+ "learning_rate": 4.676666666666667e-05,
1069
+ "loss": 0.3906,
1070
+ "step": 791
1071
+ },
1072
+ {
1073
+ "epoch": 15.655629139072847,
1074
+ "grad_norm": 9.202508926391602,
1075
+ "learning_rate": 4.668888888888889e-05,
1076
+ "loss": 0.4467,
1077
+ "step": 798
1078
+ },
1079
+ {
1080
+ "epoch": 15.794701986754967,
1081
+ "grad_norm": 5.7517619132995605,
1082
+ "learning_rate": 4.6611111111111114e-05,
1083
+ "loss": 0.4794,
1084
+ "step": 805
1085
+ },
1086
+ {
1087
+ "epoch": 15.933774834437086,
1088
+ "grad_norm": 7.352263927459717,
1089
+ "learning_rate": 4.653333333333334e-05,
1090
+ "loss": 0.5808,
1091
+ "step": 812
1092
+ },
1093
+ {
1094
+ "epoch": 16.0,
1095
+ "eval_accuracy": 0.8844262295081967,
1096
+ "eval_f1_macro": 0.8804055188447603,
1097
+ "eval_f1_micro": 0.8844262295081967,
1098
+ "eval_f1_weighted": 0.8828176785374927,
1099
+ "eval_loss": 0.42853277921676636,
1100
+ "eval_precision_macro": 0.9033134920634922,
1101
+ "eval_precision_micro": 0.8844262295081967,
1102
+ "eval_precision_weighted": 0.9039139344262296,
1103
+ "eval_recall_macro": 0.8815000000000001,
1104
+ "eval_recall_micro": 0.8844262295081967,
1105
+ "eval_recall_weighted": 0.8844262295081967,
1106
+ "eval_runtime": 16.8381,
1107
+ "eval_samples_per_second": 72.455,
1108
+ "eval_steps_per_second": 1.188,
1109
+ "step": 816
1110
+ },
1111
+ {
1112
+ "epoch": 16.05960264900662,
1113
+ "grad_norm": 7.281948089599609,
1114
+ "learning_rate": 4.645555555555556e-05,
1115
+ "loss": 0.4086,
1116
+ "step": 819
1117
+ },
1118
+ {
1119
+ "epoch": 16.198675496688743,
1120
+ "grad_norm": 6.114387512207031,
1121
+ "learning_rate": 4.6377777777777784e-05,
1122
+ "loss": 0.4764,
1123
+ "step": 826
1124
+ },
1125
+ {
1126
+ "epoch": 16.337748344370862,
1127
+ "grad_norm": 6.659070014953613,
1128
+ "learning_rate": 4.630000000000001e-05,
1129
+ "loss": 0.3795,
1130
+ "step": 833
1131
+ },
1132
+ {
1133
+ "epoch": 16.47682119205298,
1134
+ "grad_norm": 4.916147708892822,
1135
+ "learning_rate": 4.6222222222222224e-05,
1136
+ "loss": 0.4566,
1137
+ "step": 840
1138
+ },
1139
+ {
1140
+ "epoch": 16.6158940397351,
1141
+ "grad_norm": 4.47711706161499,
1142
+ "learning_rate": 4.614444444444445e-05,
1143
+ "loss": 0.4853,
1144
+ "step": 847
1145
+ },
1146
+ {
1147
+ "epoch": 16.75496688741722,
1148
+ "grad_norm": 3.844993829727173,
1149
+ "learning_rate": 4.606666666666667e-05,
1150
+ "loss": 0.4298,
1151
+ "step": 854
1152
+ },
1153
+ {
1154
+ "epoch": 16.894039735099337,
1155
+ "grad_norm": 6.387825012207031,
1156
+ "learning_rate": 4.5988888888888894e-05,
1157
+ "loss": 0.5004,
1158
+ "step": 861
1159
+ },
1160
+ {
1161
+ "epoch": 17.0,
1162
+ "eval_accuracy": 0.8762295081967213,
1163
+ "eval_f1_macro": 0.8698742597272009,
1164
+ "eval_f1_micro": 0.8762295081967213,
1165
+ "eval_f1_weighted": 0.8724110373447403,
1166
+ "eval_loss": 0.4314015805721283,
1167
+ "eval_precision_macro": 0.8928979076479078,
1168
+ "eval_precision_micro": 0.8762295081967213,
1169
+ "eval_precision_weighted": 0.8934799694840678,
1170
+ "eval_recall_macro": 0.8727380952380953,
1171
+ "eval_recall_micro": 0.8762295081967213,
1172
+ "eval_recall_weighted": 0.8762295081967213,
1173
+ "eval_runtime": 16.836,
1174
+ "eval_samples_per_second": 72.464,
1175
+ "eval_steps_per_second": 1.188,
1176
+ "step": 867
1177
+ },
1178
+ {
1179
+ "epoch": 17.019867549668874,
1180
+ "grad_norm": 27.180448532104492,
1181
+ "learning_rate": 4.591111111111112e-05,
1182
+ "loss": 0.3922,
1183
+ "step": 868
1184
+ },
1185
+ {
1186
+ "epoch": 17.158940397350992,
1187
+ "grad_norm": 7.212271690368652,
1188
+ "learning_rate": 4.5833333333333334e-05,
1189
+ "loss": 0.4964,
1190
+ "step": 875
1191
+ },
1192
+ {
1193
+ "epoch": 17.29801324503311,
1194
+ "grad_norm": 6.941275119781494,
1195
+ "learning_rate": 4.575555555555556e-05,
1196
+ "loss": 0.4749,
1197
+ "step": 882
1198
+ },
1199
+ {
1200
+ "epoch": 17.437086092715234,
1201
+ "grad_norm": 5.599315166473389,
1202
+ "learning_rate": 4.567777777777778e-05,
1203
+ "loss": 0.4885,
1204
+ "step": 889
1205
+ },
1206
+ {
1207
+ "epoch": 17.576158940397352,
1208
+ "grad_norm": 5.130136489868164,
1209
+ "learning_rate": 4.5600000000000004e-05,
1210
+ "loss": 0.5264,
1211
+ "step": 896
1212
+ },
1213
+ {
1214
+ "epoch": 17.71523178807947,
1215
+ "grad_norm": 6.701182842254639,
1216
+ "learning_rate": 4.552222222222222e-05,
1217
+ "loss": 0.4466,
1218
+ "step": 903
1219
+ },
1220
+ {
1221
+ "epoch": 17.85430463576159,
1222
+ "grad_norm": 6.988078594207764,
1223
+ "learning_rate": 4.5444444444444444e-05,
1224
+ "loss": 0.3975,
1225
+ "step": 910
1226
+ },
1227
+ {
1228
+ "epoch": 17.99337748344371,
1229
+ "grad_norm": 21.04884910583496,
1230
+ "learning_rate": 4.536666666666667e-05,
1231
+ "loss": 0.3537,
1232
+ "step": 917
1233
+ },
1234
+ {
1235
+ "epoch": 18.0,
1236
+ "eval_accuracy": 0.8827868852459017,
1237
+ "eval_f1_macro": 0.8796409929939343,
1238
+ "eval_f1_micro": 0.8827868852459017,
1239
+ "eval_f1_weighted": 0.8807563045280018,
1240
+ "eval_loss": 0.43711456656455994,
1241
+ "eval_precision_macro": 0.8961841630591632,
1242
+ "eval_precision_micro": 0.8827868852459017,
1243
+ "eval_precision_weighted": 0.8967877250254299,
1244
+ "eval_recall_macro": 0.8815892857142857,
1245
+ "eval_recall_micro": 0.8827868852459017,
1246
+ "eval_recall_weighted": 0.8827868852459017,
1247
+ "eval_runtime": 16.5565,
1248
+ "eval_samples_per_second": 73.687,
1249
+ "eval_steps_per_second": 1.208,
1250
+ "step": 918
1251
+ },
1252
+ {
1253
+ "epoch": 18.119205298013245,
1254
+ "grad_norm": 7.078250408172607,
1255
+ "learning_rate": 4.528888888888889e-05,
1256
+ "loss": 0.406,
1257
+ "step": 924
1258
+ },
1259
+ {
1260
+ "epoch": 18.258278145695364,
1261
+ "grad_norm": 5.898381233215332,
1262
+ "learning_rate": 4.5211111111111114e-05,
1263
+ "loss": 0.3619,
1264
+ "step": 931
1265
+ },
1266
+ {
1267
+ "epoch": 18.397350993377483,
1268
+ "grad_norm": 7.024068355560303,
1269
+ "learning_rate": 4.513333333333333e-05,
1270
+ "loss": 0.4441,
1271
+ "step": 938
1272
+ },
1273
+ {
1274
+ "epoch": 18.5364238410596,
1275
+ "grad_norm": 6.673207759857178,
1276
+ "learning_rate": 4.5055555555555554e-05,
1277
+ "loss": 0.3495,
1278
+ "step": 945
1279
+ },
1280
+ {
1281
+ "epoch": 18.67549668874172,
1282
+ "grad_norm": 7.188521385192871,
1283
+ "learning_rate": 4.497777777777778e-05,
1284
+ "loss": 0.424,
1285
+ "step": 952
1286
+ },
1287
+ {
1288
+ "epoch": 18.814569536423843,
1289
+ "grad_norm": 13.439776420593262,
1290
+ "learning_rate": 4.49e-05,
1291
+ "loss": 0.3988,
1292
+ "step": 959
1293
+ },
1294
+ {
1295
+ "epoch": 18.95364238410596,
1296
+ "grad_norm": 6.0843305587768555,
1297
+ "learning_rate": 4.4822222222222224e-05,
1298
+ "loss": 0.4362,
1299
+ "step": 966
1300
+ },
1301
+ {
1302
+ "epoch": 19.0,
1303
+ "eval_accuracy": 0.8827868852459017,
1304
+ "eval_f1_macro": 0.8795757517766032,
1305
+ "eval_f1_micro": 0.8827868852459017,
1306
+ "eval_f1_weighted": 0.8811126749814384,
1307
+ "eval_loss": 0.41565409302711487,
1308
+ "eval_precision_macro": 0.9002148268398268,
1309
+ "eval_precision_micro": 0.8827868852459017,
1310
+ "eval_precision_weighted": 0.9000307820594705,
1311
+ "eval_recall_macro": 0.8807797619047619,
1312
+ "eval_recall_micro": 0.8827868852459017,
1313
+ "eval_recall_weighted": 0.8827868852459017,
1314
+ "eval_runtime": 20.7708,
1315
+ "eval_samples_per_second": 58.736,
1316
+ "eval_steps_per_second": 0.963,
1317
+ "step": 969
1318
+ },
1319
+ {
1320
+ "epoch": 19.079470198675498,
1321
+ "grad_norm": 5.036433696746826,
1322
+ "learning_rate": 4.474444444444445e-05,
1323
+ "loss": 0.3053,
1324
+ "step": 973
1325
+ },
1326
+ {
1327
+ "epoch": 19.218543046357617,
1328
+ "grad_norm": 6.565299034118652,
1329
+ "learning_rate": 4.466666666666667e-05,
1330
+ "loss": 0.3934,
1331
+ "step": 980
1332
+ },
1333
+ {
1334
+ "epoch": 19.357615894039736,
1335
+ "grad_norm": 8.689690589904785,
1336
+ "learning_rate": 4.4588888888888894e-05,
1337
+ "loss": 0.4622,
1338
+ "step": 987
1339
+ },
1340
+ {
1341
+ "epoch": 19.496688741721854,
1342
+ "grad_norm": 6.253081321716309,
1343
+ "learning_rate": 4.451111111111112e-05,
1344
+ "loss": 0.4095,
1345
+ "step": 994
1346
+ },
1347
+ {
1348
+ "epoch": 19.635761589403973,
1349
+ "grad_norm": 5.1961846351623535,
1350
+ "learning_rate": 4.443333333333334e-05,
1351
+ "loss": 0.3806,
1352
+ "step": 1001
1353
+ },
1354
+ {
1355
+ "epoch": 19.774834437086092,
1356
+ "grad_norm": 7.494758129119873,
1357
+ "learning_rate": 4.435555555555556e-05,
1358
+ "loss": 0.4382,
1359
+ "step": 1008
1360
+ },
1361
+ {
1362
+ "epoch": 19.91390728476821,
1363
+ "grad_norm": 4.928430557250977,
1364
+ "learning_rate": 4.427777777777778e-05,
1365
+ "loss": 0.3672,
1366
+ "step": 1015
1367
+ },
1368
+ {
1369
+ "epoch": 20.0,
1370
+ "eval_accuracy": 0.8885245901639345,
1371
+ "eval_f1_macro": 0.8857740416618127,
1372
+ "eval_f1_micro": 0.8885245901639345,
1373
+ "eval_f1_weighted": 0.8862372325718009,
1374
+ "eval_loss": 0.404880166053772,
1375
+ "eval_precision_macro": 0.9038582528582529,
1376
+ "eval_precision_micro": 0.8885245901639345,
1377
+ "eval_precision_weighted": 0.9033163921688513,
1378
+ "eval_recall_macro": 0.887125,
1379
+ "eval_recall_micro": 0.8885245901639345,
1380
+ "eval_recall_weighted": 0.8885245901639345,
1381
+ "eval_runtime": 31.922,
1382
+ "eval_samples_per_second": 38.218,
1383
+ "eval_steps_per_second": 0.627,
1384
+ "step": 1020
1385
+ },
1386
+ {
1387
+ "epoch": 20.039735099337747,
1388
+ "grad_norm": 4.6109299659729,
1389
+ "learning_rate": 4.4200000000000004e-05,
1390
+ "loss": 0.3098,
1391
+ "step": 1022
1392
+ },
1393
+ {
1394
+ "epoch": 20.178807947019866,
1395
+ "grad_norm": 9.729621887207031,
1396
+ "learning_rate": 4.412222222222223e-05,
1397
+ "loss": 0.3719,
1398
+ "step": 1029
1399
+ },
1400
+ {
1401
+ "epoch": 20.31788079470199,
1402
+ "grad_norm": 5.514610767364502,
1403
+ "learning_rate": 4.404444444444445e-05,
1404
+ "loss": 0.3623,
1405
+ "step": 1036
1406
+ },
1407
+ {
1408
+ "epoch": 20.456953642384107,
1409
+ "grad_norm": 4.57627534866333,
1410
+ "learning_rate": 4.396666666666667e-05,
1411
+ "loss": 0.3866,
1412
+ "step": 1043
1413
+ },
1414
+ {
1415
+ "epoch": 20.596026490066226,
1416
+ "grad_norm": 5.22489595413208,
1417
+ "learning_rate": 4.388888888888889e-05,
1418
+ "loss": 0.3288,
1419
+ "step": 1050
1420
+ },
1421
+ {
1422
+ "epoch": 20.735099337748345,
1423
+ "grad_norm": 5.026643753051758,
1424
+ "learning_rate": 4.3811111111111114e-05,
1425
+ "loss": 0.3729,
1426
+ "step": 1057
1427
+ },
1428
+ {
1429
+ "epoch": 20.874172185430464,
1430
+ "grad_norm": 5.927851676940918,
1431
+ "learning_rate": 4.373333333333334e-05,
1432
+ "loss": 0.3909,
1433
+ "step": 1064
1434
+ },
1435
+ {
1436
+ "epoch": 21.0,
1437
+ "grad_norm": 2.4257664680480957,
1438
+ "learning_rate": 4.3655555555555554e-05,
1439
+ "loss": 0.3431,
1440
+ "step": 1071
1441
+ },
1442
+ {
1443
+ "epoch": 21.0,
1444
+ "eval_accuracy": 0.8885245901639345,
1445
+ "eval_f1_macro": 0.8852515238029943,
1446
+ "eval_f1_micro": 0.8885245901639345,
1447
+ "eval_f1_weighted": 0.8881485842454984,
1448
+ "eval_loss": 0.4021802544593811,
1449
+ "eval_precision_macro": 0.9078284354534355,
1450
+ "eval_precision_micro": 0.8885245901639345,
1451
+ "eval_precision_weighted": 0.9098325740743773,
1452
+ "eval_recall_macro": 0.885482142857143,
1453
+ "eval_recall_micro": 0.8885245901639345,
1454
+ "eval_recall_weighted": 0.8885245901639345,
1455
+ "eval_runtime": 19.889,
1456
+ "eval_samples_per_second": 61.341,
1457
+ "eval_steps_per_second": 1.006,
1458
+ "step": 1071
1459
+ },
1460
+ {
1461
+ "epoch": 21.13907284768212,
1462
+ "grad_norm": 4.817611217498779,
1463
+ "learning_rate": 4.357777777777778e-05,
1464
+ "loss": 0.3287,
1465
+ "step": 1078
1466
+ },
1467
+ {
1468
+ "epoch": 21.278145695364238,
1469
+ "grad_norm": 5.901778697967529,
1470
+ "learning_rate": 4.35e-05,
1471
+ "loss": 0.3539,
1472
+ "step": 1085
1473
+ },
1474
+ {
1475
+ "epoch": 21.417218543046356,
1476
+ "grad_norm": 6.392418384552002,
1477
+ "learning_rate": 4.3422222222222224e-05,
1478
+ "loss": 0.3926,
1479
+ "step": 1092
1480
+ },
1481
+ {
1482
+ "epoch": 21.556291390728475,
1483
+ "grad_norm": 14.076611518859863,
1484
+ "learning_rate": 4.334444444444445e-05,
1485
+ "loss": 0.3784,
1486
+ "step": 1099
1487
+ },
1488
+ {
1489
+ "epoch": 21.695364238410598,
1490
+ "grad_norm": 8.352983474731445,
1491
+ "learning_rate": 4.3266666666666664e-05,
1492
+ "loss": 0.3291,
1493
+ "step": 1106
1494
+ },
1495
+ {
1496
+ "epoch": 21.834437086092716,
1497
+ "grad_norm": 8.554953575134277,
1498
+ "learning_rate": 4.318888888888889e-05,
1499
+ "loss": 0.3553,
1500
+ "step": 1113
1501
+ },
1502
+ {
1503
+ "epoch": 21.973509933774835,
1504
+ "grad_norm": 5.422201633453369,
1505
+ "learning_rate": 4.311111111111111e-05,
1506
+ "loss": 0.3301,
1507
+ "step": 1120
1508
+ },
1509
+ {
1510
+ "epoch": 22.0,
1511
+ "eval_accuracy": 0.8868852459016393,
1512
+ "eval_f1_macro": 0.8844045750001632,
1513
+ "eval_f1_micro": 0.8868852459016393,
1514
+ "eval_f1_weighted": 0.8863481245798025,
1515
+ "eval_loss": 0.43187016248703003,
1516
+ "eval_precision_macro": 0.905546176046176,
1517
+ "eval_precision_micro": 0.8868852459016393,
1518
+ "eval_precision_weighted": 0.9069275010053698,
1519
+ "eval_recall_macro": 0.884922619047619,
1520
+ "eval_recall_micro": 0.8868852459016393,
1521
+ "eval_recall_weighted": 0.8868852459016393,
1522
+ "eval_runtime": 16.9877,
1523
+ "eval_samples_per_second": 71.817,
1524
+ "eval_steps_per_second": 1.177,
1525
+ "step": 1122
1526
+ },
1527
+ {
1528
+ "epoch": 22.09933774834437,
1529
+ "grad_norm": 4.536903381347656,
1530
+ "learning_rate": 4.3033333333333334e-05,
1531
+ "loss": 0.3081,
1532
+ "step": 1127
1533
+ },
1534
+ {
1535
+ "epoch": 22.23841059602649,
1536
+ "grad_norm": 5.818119525909424,
1537
+ "learning_rate": 4.295555555555556e-05,
1538
+ "loss": 0.4447,
1539
+ "step": 1134
1540
+ },
1541
+ {
1542
+ "epoch": 22.37748344370861,
1543
+ "grad_norm": 6.355660438537598,
1544
+ "learning_rate": 4.287777777777778e-05,
1545
+ "loss": 0.2905,
1546
+ "step": 1141
1547
+ },
1548
+ {
1549
+ "epoch": 22.516556291390728,
1550
+ "grad_norm": 3.7836861610412598,
1551
+ "learning_rate": 4.2800000000000004e-05,
1552
+ "loss": 0.3363,
1553
+ "step": 1148
1554
+ },
1555
+ {
1556
+ "epoch": 22.655629139072847,
1557
+ "grad_norm": 7.677190780639648,
1558
+ "learning_rate": 4.272222222222223e-05,
1559
+ "loss": 0.335,
1560
+ "step": 1155
1561
+ },
1562
+ {
1563
+ "epoch": 22.794701986754966,
1564
+ "grad_norm": 9.610170364379883,
1565
+ "learning_rate": 4.264444444444445e-05,
1566
+ "loss": 0.3224,
1567
+ "step": 1162
1568
+ },
1569
+ {
1570
+ "epoch": 22.933774834437084,
1571
+ "grad_norm": 4.4913458824157715,
1572
+ "learning_rate": 4.2566666666666674e-05,
1573
+ "loss": 0.3594,
1574
+ "step": 1169
1575
+ },
1576
+ {
1577
+ "epoch": 23.0,
1578
+ "eval_accuracy": 0.8909836065573771,
1579
+ "eval_f1_macro": 0.8853569522153114,
1580
+ "eval_f1_micro": 0.8909836065573771,
1581
+ "eval_f1_weighted": 0.8877271844125241,
1582
+ "eval_loss": 0.415208101272583,
1583
+ "eval_precision_macro": 0.9054862637362637,
1584
+ "eval_precision_micro": 0.8909836065573771,
1585
+ "eval_precision_weighted": 0.9061880492003442,
1586
+ "eval_recall_macro": 0.8882023809523809,
1587
+ "eval_recall_micro": 0.8909836065573771,
1588
+ "eval_recall_weighted": 0.8909836065573771,
1589
+ "eval_runtime": 21.468,
1590
+ "eval_samples_per_second": 56.829,
1591
+ "eval_steps_per_second": 0.932,
1592
+ "step": 1173
1593
+ },
1594
+ {
1595
+ "epoch": 23.05960264900662,
1596
+ "grad_norm": 6.537961959838867,
1597
+ "learning_rate": 4.248888888888889e-05,
1598
+ "loss": 0.3416,
1599
+ "step": 1176
1600
+ },
1601
+ {
1602
+ "epoch": 23.198675496688743,
1603
+ "grad_norm": 4.195661544799805,
1604
+ "learning_rate": 4.2411111111111114e-05,
1605
+ "loss": 0.3493,
1606
+ "step": 1183
1607
+ },
1608
+ {
1609
+ "epoch": 23.337748344370862,
1610
+ "grad_norm": 6.090582370758057,
1611
+ "learning_rate": 4.233333333333334e-05,
1612
+ "loss": 0.2816,
1613
+ "step": 1190
1614
+ },
1615
+ {
1616
+ "epoch": 23.47682119205298,
1617
+ "grad_norm": 4.269461154937744,
1618
+ "learning_rate": 4.225555555555556e-05,
1619
+ "loss": 0.3461,
1620
+ "step": 1197
1621
+ },
1622
+ {
1623
+ "epoch": 23.6158940397351,
1624
+ "grad_norm": 5.240416049957275,
1625
+ "learning_rate": 4.217777777777778e-05,
1626
+ "loss": 0.3688,
1627
+ "step": 1204
1628
+ },
1629
+ {
1630
+ "epoch": 23.75496688741722,
1631
+ "grad_norm": 3.2008297443389893,
1632
+ "learning_rate": 4.21e-05,
1633
+ "loss": 0.2945,
1634
+ "step": 1211
1635
+ },
1636
+ {
1637
+ "epoch": 23.894039735099337,
1638
+ "grad_norm": 5.42747163772583,
1639
+ "learning_rate": 4.2022222222222223e-05,
1640
+ "loss": 0.365,
1641
+ "step": 1218
1642
+ },
1643
+ {
1644
+ "epoch": 24.0,
1645
+ "eval_accuracy": 0.8868852459016393,
1646
+ "eval_f1_macro": 0.8826740438354216,
1647
+ "eval_f1_micro": 0.8868852459016393,
1648
+ "eval_f1_weighted": 0.885225590303497,
1649
+ "eval_loss": 0.41284599900245667,
1650
+ "eval_precision_macro": 0.8989790764790766,
1651
+ "eval_precision_micro": 0.8868852459016393,
1652
+ "eval_precision_weighted": 0.90095879757765,
1653
+ "eval_recall_macro": 0.8842916666666667,
1654
+ "eval_recall_micro": 0.8868852459016393,
1655
+ "eval_recall_weighted": 0.8868852459016393,
1656
+ "eval_runtime": 18.0644,
1657
+ "eval_samples_per_second": 67.536,
1658
+ "eval_steps_per_second": 1.107,
1659
+ "step": 1224
1660
+ },
1661
+ {
1662
+ "epoch": 24.019867549668874,
1663
+ "grad_norm": 4.910553455352783,
1664
+ "learning_rate": 4.194444444444445e-05,
1665
+ "loss": 0.27,
1666
+ "step": 1225
1667
+ },
1668
+ {
1669
+ "epoch": 24.158940397350992,
1670
+ "grad_norm": 4.917506694793701,
1671
+ "learning_rate": 4.186666666666667e-05,
1672
+ "loss": 0.2662,
1673
+ "step": 1232
1674
+ },
1675
+ {
1676
+ "epoch": 24.29801324503311,
1677
+ "grad_norm": 6.041675090789795,
1678
+ "learning_rate": 4.178888888888889e-05,
1679
+ "loss": 0.3693,
1680
+ "step": 1239
1681
+ },
1682
+ {
1683
+ "epoch": 24.437086092715234,
1684
+ "grad_norm": 6.5168776512146,
1685
+ "learning_rate": 4.171111111111111e-05,
1686
+ "loss": 0.2868,
1687
+ "step": 1246
1688
+ },
1689
+ {
1690
+ "epoch": 24.576158940397352,
1691
+ "grad_norm": 3.36521315574646,
1692
+ "learning_rate": 4.1633333333333333e-05,
1693
+ "loss": 0.4133,
1694
+ "step": 1253
1695
+ },
1696
+ {
1697
+ "epoch": 24.71523178807947,
1698
+ "grad_norm": 4.277838230133057,
1699
+ "learning_rate": 4.155555555555556e-05,
1700
+ "loss": 0.2291,
1701
+ "step": 1260
1702
+ },
1703
+ {
1704
+ "epoch": 24.85430463576159,
1705
+ "grad_norm": 5.821409225463867,
1706
+ "learning_rate": 4.147777777777778e-05,
1707
+ "loss": 0.2927,
1708
+ "step": 1267
1709
+ },
1710
+ {
1711
+ "epoch": 24.99337748344371,
1712
+ "grad_norm": 6.532901763916016,
1713
+ "learning_rate": 4.14e-05,
1714
+ "loss": 0.3711,
1715
+ "step": 1274
1716
+ },
1717
+ {
1718
+ "epoch": 25.0,
1719
+ "eval_accuracy": 0.8885245901639345,
1720
+ "eval_f1_macro": 0.8860624318491966,
1721
+ "eval_f1_micro": 0.8885245901639345,
1722
+ "eval_f1_weighted": 0.8860652284338398,
1723
+ "eval_loss": 0.3975684940814972,
1724
+ "eval_precision_macro": 0.9022628066378067,
1725
+ "eval_precision_micro": 0.8885245901639345,
1726
+ "eval_precision_weighted": 0.901722671681688,
1727
+ "eval_recall_macro": 0.8880000000000001,
1728
+ "eval_recall_micro": 0.8885245901639345,
1729
+ "eval_recall_weighted": 0.8885245901639345,
1730
+ "eval_runtime": 19.8974,
1731
+ "eval_samples_per_second": 61.315,
1732
+ "eval_steps_per_second": 1.005,
1733
+ "step": 1275
1734
+ }
1735
+ ],
1736
+ "logging_steps": 7,
1737
+ "max_steps": 5000,
1738
+ "num_input_tokens_seen": 0,
1739
+ "num_train_epochs": 100,
1740
+ "save_steps": 7,
1741
+ "stateful_callbacks": {
1742
+ "EarlyStoppingCallback": {
1743
+ "args": {
1744
+ "early_stopping_patience": 5,
1745
+ "early_stopping_threshold": 0.01
1746
+ },
1747
+ "attributes": {
1748
+ "early_stopping_patience_counter": 5
1749
+ }
1750
+ },
1751
+ "TrainerControl": {
1752
+ "args": {
1753
+ "should_epoch_stop": false,
1754
+ "should_evaluate": false,
1755
+ "should_log": false,
1756
+ "should_save": true,
1757
+ "should_training_stop": true
1758
+ },
1759
+ "attributes": {}
1760
+ }
1761
+ },
1762
+ "total_flos": 9.44898430624727e+18,
1763
+ "train_batch_size": 32,
1764
+ "trial_name": null,
1765
+ "trial_params": null
1766
+ }
checkpoint-1275/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e36d308132a8108aca1f8f9de8ab7c5f8ba3dca8073c7913d5cd428b151f1cc
3
+ size 5368