14kwonss commited on
Commit
4e55c46
·
verified ·
1 Parent(s): 0fbef06

Upload model checkpoint-138718

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,1525 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "text-classification",
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "Akuapim-twi_Latn",
15
+ "1": "Asante-twi_Latn",
16
+ "2": "aaa_Latn",
17
+ "3": "aar_Arab",
18
+ "4": "aar_Latn",
19
+ "5": "aba_Latn",
20
+ "6": "abi_Latn",
21
+ "7": "abn_Latn",
22
+ "8": "acd_Latn",
23
+ "9": "ach_Latn",
24
+ "10": "acq_Arab",
25
+ "11": "ada_Latn",
26
+ "12": "ade_Latn",
27
+ "13": "adh_Latn",
28
+ "14": "adj_Latn",
29
+ "15": "adq_Latn",
30
+ "16": "aeb_Arab",
31
+ "17": "afr_Latn",
32
+ "18": "agq_Latn",
33
+ "19": "ags_Latn",
34
+ "20": "aha_Latn",
35
+ "21": "ajg_Latn",
36
+ "22": "aka_Latn",
37
+ "23": "akp_Latn",
38
+ "24": "ald_Latn",
39
+ "25": "alz_Latn",
40
+ "26": "amf_Latn",
41
+ "27": "amh_Ethi",
42
+ "28": "ann_Latn",
43
+ "29": "anu_Ethi",
44
+ "30": "anv_Latn",
45
+ "31": "any_Latn",
46
+ "32": "apd_Arab",
47
+ "33": "ara_Arab",
48
+ "34": "arb_Arab",
49
+ "35": "arb_Latn",
50
+ "36": "arq_Arab",
51
+ "37": "ary_Arab",
52
+ "38": "arz_Arab",
53
+ "39": "asa_Latn",
54
+ "40": "asg_Latn",
55
+ "41": "atg_Latn",
56
+ "42": "ati_Latn",
57
+ "43": "avn_Latn",
58
+ "44": "avu_Latn",
59
+ "45": "ayl_Arab",
60
+ "46": "azo_Latn",
61
+ "47": "bag_Latn",
62
+ "48": "bam_Latn",
63
+ "49": "bas_Latn",
64
+ "50": "bav_Latn",
65
+ "51": "baw_Arab",
66
+ "52": "bax_Latn",
67
+ "53": "bba_Latn",
68
+ "54": "bbj_Latn",
69
+ "55": "bbk_Latn",
70
+ "56": "bbo_Latn",
71
+ "57": "bce_Latn",
72
+ "58": "bci_Latn",
73
+ "59": "bcn_Latn",
74
+ "60": "bcw_Latn",
75
+ "61": "bcy_Latn",
76
+ "62": "bdh_Latn",
77
+ "63": "bds_Latn",
78
+ "64": "bec_Latn",
79
+ "65": "bem_Latn",
80
+ "66": "beq_Latn",
81
+ "67": "ber_Tfng",
82
+ "68": "bex_Latn",
83
+ "69": "bez_Latn",
84
+ "70": "bfa_Latn",
85
+ "71": "bfd_Latn",
86
+ "72": "bfm_Latn",
87
+ "73": "bfo_Latn",
88
+ "74": "bgf_Latn",
89
+ "75": "bhs_Latn",
90
+ "76": "bib_Latn",
91
+ "77": "bim_Latn",
92
+ "78": "bin_Latn",
93
+ "79": "biv_Latn",
94
+ "80": "bjv_Latn",
95
+ "81": "bkc_Latn",
96
+ "82": "bkh_Latn",
97
+ "83": "bkm_Latn",
98
+ "84": "bkv_Latn",
99
+ "85": "bky_Latn",
100
+ "86": "blh_Latn",
101
+ "87": "blo_Latn",
102
+ "88": "bmo_Latn",
103
+ "89": "bmq_Latn",
104
+ "90": "bmv_Latn",
105
+ "91": "bob_Latn",
106
+ "92": "bom_Latn",
107
+ "93": "bov_Latn",
108
+ "94": "box_Latn",
109
+ "95": "boz_Latn",
110
+ "96": "bqc_Latn",
111
+ "97": "bqj_Latn",
112
+ "98": "bqm_Latn",
113
+ "99": "bqp_Latn",
114
+ "100": "bri_Latn",
115
+ "101": "bsc_Latn",
116
+ "102": "bsp_Latn",
117
+ "103": "bsq_Latn",
118
+ "104": "bss_Latn",
119
+ "105": "bst_Ethi",
120
+ "106": "btt_Latn",
121
+ "107": "bud_Latn",
122
+ "108": "bum_Latn",
123
+ "109": "bun_Latn",
124
+ "110": "bus_Latn",
125
+ "111": "buy_Latn",
126
+ "112": "bwq_Latn",
127
+ "113": "bwr_Latn",
128
+ "114": "bwt_Latn",
129
+ "115": "bwu_Latn",
130
+ "116": "bxk_Latn",
131
+ "117": "byf_Latn",
132
+ "118": "byv_Latn",
133
+ "119": "bza_Latn",
134
+ "120": "bze_Arab",
135
+ "121": "bzw_Latn",
136
+ "122": "cce_Latn",
137
+ "123": "cgg_Latn",
138
+ "124": "chw_Latn",
139
+ "125": "cjk_Latn",
140
+ "126": "cko_Latn",
141
+ "127": "cme_Latn",
142
+ "128": "cop_Copt",
143
+ "129": "cou_Latn",
144
+ "130": "cri_Latn",
145
+ "131": "crs_Latn",
146
+ "132": "csk_Latn",
147
+ "133": "cuh_Latn",
148
+ "134": "cuv_Latn",
149
+ "135": "cwe_Latn",
150
+ "136": "cwt_Latn",
151
+ "137": "daa_Latn",
152
+ "138": "dag_Latn",
153
+ "139": "dav_Latn",
154
+ "140": "dbq_Latn",
155
+ "141": "ddn_Latn",
156
+ "142": "dga_Latn",
157
+ "143": "dgd_Latn",
158
+ "144": "dgi_Latn",
159
+ "145": "dhm_Latn",
160
+ "146": "dib_Latn",
161
+ "147": "did_Latn",
162
+ "148": "dig_Latn",
163
+ "149": "dik_Latn",
164
+ "150": "din_Latn",
165
+ "151": "dip_Latn",
166
+ "152": "diu_Latn",
167
+ "153": "dje_Latn",
168
+ "154": "dks_Latn",
169
+ "155": "dnj_Latn",
170
+ "156": "dop_Latn",
171
+ "157": "dos_Latn",
172
+ "158": "dov_Latn",
173
+ "159": "dow_Latn",
174
+ "160": "dsh_Latn",
175
+ "161": "dts_Latn",
176
+ "162": "dua_Latn",
177
+ "163": "dug_Latn",
178
+ "164": "dur_Latn",
179
+ "165": "dwr_Ethi",
180
+ "166": "dwr_Latn",
181
+ "167": "dyi_Latn",
182
+ "168": "dyo_Latn",
183
+ "169": "dyu_Latn",
184
+ "170": "ebr_Latn",
185
+ "171": "ebu_Latn",
186
+ "172": "efi_Latn",
187
+ "173": "ego_Latn",
188
+ "174": "eka_Latn",
189
+ "175": "ekm_Latn",
190
+ "176": "eko_Latn",
191
+ "177": "emk_Latn",
192
+ "178": "enb_Latn",
193
+ "179": "eot_Latn",
194
+ "180": "eto_Latn",
195
+ "181": "ets_Latn",
196
+ "182": "etu_Latn",
197
+ "183": "etx_Latn",
198
+ "184": "ewe_Latn",
199
+ "185": "ewo_Latn",
200
+ "186": "eza_Latn",
201
+ "187": "fak_Latn",
202
+ "188": "fal_Latn",
203
+ "189": "fan_Latn",
204
+ "190": "fat_Latn",
205
+ "191": "ffm_Latn",
206
+ "192": "fia_Latn",
207
+ "193": "fip_Latn",
208
+ "194": "fli_Latn",
209
+ "195": "flr_Latn",
210
+ "196": "fon_Latn",
211
+ "197": "fub_Latn",
212
+ "198": "fuc_Latn",
213
+ "199": "fue_Latn",
214
+ "200": "fuf_Latn",
215
+ "201": "fuh_Latn",
216
+ "202": "ful_Latn",
217
+ "203": "fuq_Latn",
218
+ "204": "fuv_Arab",
219
+ "205": "fuv_Latn",
220
+ "206": "fvr_Latn",
221
+ "207": "gaa_Latn",
222
+ "208": "gax_Latn",
223
+ "209": "gaz_Latn",
224
+ "210": "gbo_Latn",
225
+ "211": "gbr_Latn",
226
+ "212": "gde_Latn",
227
+ "213": "gej_Latn",
228
+ "214": "gez_Ethi",
229
+ "215": "gid_Latn",
230
+ "216": "giz_Latn",
231
+ "217": "gjn_Latn",
232
+ "218": "gkn_Latn",
233
+ "219": "gkp_Latn",
234
+ "220": "gmv_Ethi",
235
+ "221": "gmv_Latn",
236
+ "222": "gna_Latn",
237
+ "223": "gnd_Latn",
238
+ "224": "gng_Latn",
239
+ "225": "goa_Latn",
240
+ "226": "gof_Ethi",
241
+ "227": "gof_Latn",
242
+ "228": "gog_Latn",
243
+ "229": "gol_Vaii",
244
+ "230": "gou_Latn",
245
+ "231": "gqr_Latn",
246
+ "232": "gso_Latn",
247
+ "233": "gud_Latn",
248
+ "234": "guk_Ethi",
249
+ "235": "gur_Latn",
250
+ "236": "guw_Latn",
251
+ "237": "gux_Latn",
252
+ "238": "guz_Latn",
253
+ "239": "gvl_Latn",
254
+ "240": "gwl_Latn",
255
+ "241": "gwr_Latn",
256
+ "242": "gya_Latn",
257
+ "243": "hae_Latn",
258
+ "244": "hag_Latn",
259
+ "245": "har_Ethi",
260
+ "246": "hau_Latn",
261
+ "247": "hav_Latn",
262
+ "248": "hay_Latn",
263
+ "249": "hbb_Latn",
264
+ "250": "hdy_Ethi",
265
+ "251": "heh_Latn",
266
+ "252": "her_Latn",
267
+ "253": "hgm_Latn",
268
+ "254": "hig_Latn",
269
+ "255": "hna_Latn",
270
+ "256": "ibb_Latn",
271
+ "257": "ibo_Latn",
272
+ "258": "idu_Latn",
273
+ "259": "ife_Latn",
274
+ "260": "igb_Latn",
275
+ "261": "ige_Latn",
276
+ "262": "igl_Latn",
277
+ "263": "ijc_Latn",
278
+ "264": "ijn_Latn",
279
+ "265": "ijs_Latn",
280
+ "266": "ikk_Latn",
281
+ "267": "ikw_Latn",
282
+ "268": "ilb_Latn",
283
+ "269": "iqw_Latn",
284
+ "270": "iri_Latn",
285
+ "271": "irk_Latn",
286
+ "272": "ish_Latn",
287
+ "273": "iso_Latn",
288
+ "274": "isu_Latn",
289
+ "275": "iyx_Latn",
290
+ "276": "izr_Latn",
291
+ "277": "izz_Latn",
292
+ "278": "jab_Latn",
293
+ "279": "jbu_Latn",
294
+ "280": "jen_Latn",
295
+ "281": "jgo_Latn",
296
+ "282": "jib_Latn",
297
+ "283": "jit_Latn",
298
+ "284": "jmc_Latn",
299
+ "285": "kab_Latn",
300
+ "286": "kam_Latn",
301
+ "287": "kao_Latn",
302
+ "288": "kau_Latn",
303
+ "289": "kbn_Latn",
304
+ "290": "kbo_Latn",
305
+ "291": "kbp_Latn",
306
+ "292": "kbr_Latn",
307
+ "293": "kby_Latn",
308
+ "294": "kcg_Latn",
309
+ "295": "kck_Latn",
310
+ "296": "kcp_Latn",
311
+ "297": "kdc_Latn",
312
+ "298": "kde_Latn",
313
+ "299": "kdh_Latn",
314
+ "300": "kdi_Latn",
315
+ "301": "kdj_Latn",
316
+ "302": "kdl_Latn",
317
+ "303": "kdn_Latn",
318
+ "304": "kea_Latn",
319
+ "305": "ken_Latn",
320
+ "306": "keo_Latn",
321
+ "307": "ker_Latn",
322
+ "308": "kez_Latn",
323
+ "309": "khq_Latn",
324
+ "310": "khy_Latn",
325
+ "311": "kia_Latn",
326
+ "312": "kik_Latn",
327
+ "313": "kin_Latn",
328
+ "314": "kiz_Latn",
329
+ "315": "kki_Latn",
330
+ "316": "kkj_Latn",
331
+ "317": "kln_Latn",
332
+ "318": "klu_Latn",
333
+ "319": "kma_Latn",
334
+ "320": "kmb_Latn",
335
+ "321": "kmy_Latn",
336
+ "322": "knc_Arab",
337
+ "323": "knc_Latn",
338
+ "324": "knf_Latn",
339
+ "325": "kng_Latn",
340
+ "326": "knk_Latn",
341
+ "327": "kno_Latn",
342
+ "328": "kny_Latn",
343
+ "329": "kon_Latn",
344
+ "330": "koo_Latn",
345
+ "331": "koq_Latn",
346
+ "332": "kpz_Latn",
347
+ "333": "kqn_Latn",
348
+ "334": "kqo_Latn",
349
+ "335": "kqp_Latn",
350
+ "336": "kqs_Latn",
351
+ "337": "kqy_Ethi",
352
+ "338": "kri_Latn",
353
+ "339": "krs_Latn",
354
+ "340": "krw_Latn",
355
+ "341": "krx_Latn",
356
+ "342": "ksb_Latn",
357
+ "343": "ksf_Latn",
358
+ "344": "ksp_Latn",
359
+ "345": "kss_Latn",
360
+ "346": "ktb_Ethi",
361
+ "347": "ktj_Latn",
362
+ "348": "ktu_Latn",
363
+ "349": "ktz_Latn",
364
+ "350": "kua_Latn",
365
+ "351": "kub_Latn",
366
+ "352": "kuj_Latn",
367
+ "353": "kus_Latn",
368
+ "354": "kvj_Latn",
369
+ "355": "kwn_Latn",
370
+ "356": "kwu_Latn",
371
+ "357": "kwy_Latn",
372
+ "358": "kxc_Ethi",
373
+ "359": "kyf_Latn",
374
+ "360": "kyq_Latn",
375
+ "361": "kzn_Latn",
376
+ "362": "kzr_Latn",
377
+ "363": "lai_Latn",
378
+ "364": "laj_Latn",
379
+ "365": "lam_Latn",
380
+ "366": "lan_Latn",
381
+ "367": "lap_Latn",
382
+ "368": "las_Latn",
383
+ "369": "ldi_Latn",
384
+ "370": "lea_Latn",
385
+ "371": "led_Latn",
386
+ "372": "lee_Latn",
387
+ "373": "lef_Latn",
388
+ "374": "leh_Latn",
389
+ "375": "lem_Latn",
390
+ "376": "lfa_Latn",
391
+ "377": "lgg_Latn",
392
+ "378": "lgm_Latn",
393
+ "379": "lia_Latn",
394
+ "380": "lik_Latn",
395
+ "381": "lin_Latn",
396
+ "382": "lip_Latn",
397
+ "383": "lkb_Latn",
398
+ "384": "lke_Latn",
399
+ "385": "lko_Latn",
400
+ "386": "llb_Latn",
401
+ "387": "lln_Latn",
402
+ "388": "lmd_Latn",
403
+ "389": "lmp_Latn",
404
+ "390": "lnl_Latn",
405
+ "391": "lns_Latn",
406
+ "392": "lob_Latn",
407
+ "393": "log_Latn",
408
+ "394": "loh_Latn",
409
+ "395": "lok_Latn",
410
+ "396": "lol_Latn",
411
+ "397": "lom_Latn",
412
+ "398": "loq_Latn",
413
+ "399": "lot_Latn",
414
+ "400": "loz_Latn",
415
+ "401": "lro_Latn",
416
+ "402": "lsm_Latn",
417
+ "403": "lth_Latn",
418
+ "404": "lto_Latn",
419
+ "405": "lts_Latn",
420
+ "406": "lua_Latn",
421
+ "407": "lub_Latn",
422
+ "408": "luc_Latn",
423
+ "409": "lue_Latn",
424
+ "410": "lug_Latn",
425
+ "411": "lun_Latn",
426
+ "412": "luo_Latn",
427
+ "413": "luy_Latn",
428
+ "414": "lwg_Latn",
429
+ "415": "lwo_Latn",
430
+ "416": "maf_Latn",
431
+ "417": "mas_Latn",
432
+ "418": "maw_Latn",
433
+ "419": "mbu_Latn",
434
+ "420": "mck_Latn",
435
+ "421": "mcn_Latn",
436
+ "422": "mcp_Latn",
437
+ "423": "mcu_Latn",
438
+ "424": "mda_Latn",
439
+ "425": "mdm_Latn",
440
+ "426": "mdy_Ethi",
441
+ "427": "men_Latn",
442
+ "428": "meq_Latn",
443
+ "429": "mer_Latn",
444
+ "430": "mev_Latn",
445
+ "431": "mfe_Latn",
446
+ "432": "mfg_Latn",
447
+ "433": "mfh_Latn",
448
+ "434": "mfi_Latn",
449
+ "435": "mfj_Latn",
450
+ "436": "mfk_Latn",
451
+ "437": "mfq_Latn",
452
+ "438": "mfz_Latn",
453
+ "439": "mgc_Latn",
454
+ "440": "mgg_Latn",
455
+ "441": "mgh_Latn",
456
+ "442": "mgo_Latn",
457
+ "443": "mgq_Latn",
458
+ "444": "mgr_Latn",
459
+ "445": "mgw_Latn",
460
+ "446": "mhi_Latn",
461
+ "447": "mhw_Latn",
462
+ "448": "mif_Latn",
463
+ "449": "mkl_Latn",
464
+ "450": "mlg_Latn",
465
+ "451": "mlk_Latn",
466
+ "452": "mlr_Latn",
467
+ "453": "mlw_Latn",
468
+ "454": "mmu_Latn",
469
+ "455": "mmy_Latn",
470
+ "456": "mne_Latn",
471
+ "457": "mnf_Latn",
472
+ "458": "mnk_Latn",
473
+ "459": "mny_Latn",
474
+ "460": "moa_Latn",
475
+ "461": "mor_Latn",
476
+ "462": "mos_Latn",
477
+ "463": "moy_Latn",
478
+ "464": "moz_Latn",
479
+ "465": "mpe_Ethi",
480
+ "466": "mpg_Latn",
481
+ "467": "mqb_Latn",
482
+ "468": "msc_Latn",
483
+ "469": "mse_Latn",
484
+ "470": "mua_Latn",
485
+ "471": "mug_Latn",
486
+ "472": "muh_Latn",
487
+ "473": "mur_Latn",
488
+ "474": "muy_Latn",
489
+ "475": "mwe_Latn",
490
+ "476": "mwm_Latn",
491
+ "477": "mwn_Latn",
492
+ "478": "mws_Latn",
493
+ "479": "mxu_Latn",
494
+ "480": "myb_Latn",
495
+ "481": "myk_Latn",
496
+ "482": "myx_Latn",
497
+ "483": "mzk_Latn",
498
+ "484": "mzm_Latn",
499
+ "485": "mzw_Latn",
500
+ "486": "naq_Latn",
501
+ "487": "naw_Latn",
502
+ "488": "nba_Latn",
503
+ "489": "nbl_Latn",
504
+ "490": "ncu_Latn",
505
+ "491": "ndc_Latn",
506
+ "492": "nde_Latn",
507
+ "493": "ndh_Latn",
508
+ "494": "ndi_Latn",
509
+ "495": "ndj_Latn",
510
+ "496": "ndo_Latn",
511
+ "497": "ndp_Latn",
512
+ "498": "ndv_Latn",
513
+ "499": "ndy_Latn",
514
+ "500": "ndz_Latn",
515
+ "501": "neb_Latn",
516
+ "502": "nfr_Latn",
517
+ "503": "ngb_Latn",
518
+ "504": "ngc_Latn",
519
+ "505": "nge_Latn",
520
+ "506": "ngl_Latn",
521
+ "507": "ngn_Latn",
522
+ "508": "ngp_Latn",
523
+ "509": "nhr_Latn",
524
+ "510": "nhu_Latn",
525
+ "511": "nih_Latn",
526
+ "512": "nim_Latn",
527
+ "513": "nin_Latn",
528
+ "514": "niq_Latn",
529
+ "515": "niy_Latn",
530
+ "516": "njd_Latn",
531
+ "517": "njy_Latn",
532
+ "518": "nka_Latn",
533
+ "519": "nko_Latn",
534
+ "520": "nku_Latn",
535
+ "521": "nla_Latn",
536
+ "522": "nle_Latn",
537
+ "523": "nmz_Latn",
538
+ "524": "nnb_Latn",
539
+ "525": "nnh_Latn",
540
+ "526": "nnq_Latn",
541
+ "527": "nnw_Latn",
542
+ "528": "nqo_Nkoo",
543
+ "529": "nse_Latn",
544
+ "530": "nso_Latn",
545
+ "531": "ntr_Latn",
546
+ "532": "nuj_Latn",
547
+ "533": "nup_Latn",
548
+ "534": "nus_Latn",
549
+ "535": "nwb_Latn",
550
+ "536": "nwe_Latn",
551
+ "537": "nxd_Latn",
552
+ "538": "nya_Latn",
553
+ "539": "nyb_Latn",
554
+ "540": "nyd_Latn",
555
+ "541": "nyf_Latn",
556
+ "542": "nyk_Latn",
557
+ "543": "nym_Latn",
558
+ "544": "nyn_Latn",
559
+ "545": "nyo_Latn",
560
+ "546": "nyu_Latn",
561
+ "547": "nyy_Latn",
562
+ "548": "nza_Latn",
563
+ "549": "nzi_Latn",
564
+ "550": "odu_Latn",
565
+ "551": "ogo_Latn",
566
+ "552": "oke_Latn",
567
+ "553": "oki_Latn",
568
+ "554": "okr_Latn",
569
+ "555": "oku_Latn",
570
+ "556": "old_Latn",
571
+ "557": "orm_Latn",
572
+ "558": "ozm_Latn",
573
+ "559": "pae_Latn",
574
+ "560": "pbi_Latn",
575
+ "561": "pcm_Latn",
576
+ "562": "pem_Latn",
577
+ "563": "pfe_Latn",
578
+ "564": "phm_Latn",
579
+ "565": "pil_Latn",
580
+ "566": "pkb_Latn",
581
+ "567": "pko_Latn",
582
+ "568": "plt_Latn",
583
+ "569": "pny_Latn",
584
+ "570": "pnz_Latn",
585
+ "571": "pov_Latn",
586
+ "572": "poy_Latn",
587
+ "573": "rag_Latn",
588
+ "574": "rcf_Latn",
589
+ "575": "rel_Latn",
590
+ "576": "rif_Tfng",
591
+ "577": "rim_Latn",
592
+ "578": "rnd_Latn",
593
+ "579": "rng_Latn",
594
+ "580": "rub_Latn",
595
+ "581": "ruf_Latn",
596
+ "582": "run_Latn",
597
+ "583": "rwk_Latn",
598
+ "584": "sag_Latn",
599
+ "585": "saq_Latn",
600
+ "586": "say_Latn",
601
+ "587": "sba_Latn",
602
+ "588": "sbd_Latn",
603
+ "589": "sbp_Latn",
604
+ "590": "sbs_Latn",
605
+ "591": "sby_Latn",
606
+ "592": "sef_Latn",
607
+ "593": "seh_Latn",
608
+ "594": "ses_Latn",
609
+ "595": "sev_Latn",
610
+ "596": "sfw_Latn",
611
+ "597": "sgc_Latn",
612
+ "598": "sgw_Ethi",
613
+ "599": "shi_Latn",
614
+ "600": "shj_Latn",
615
+ "601": "shk_Latn",
616
+ "602": "shr_Latn",
617
+ "603": "shu_Arab",
618
+ "604": "sid_Latn",
619
+ "605": "sig_Latn",
620
+ "606": "sil_Latn",
621
+ "607": "skg_Latn",
622
+ "608": "sld_Latn",
623
+ "609": "sna_Latn",
624
+ "610": "snf_Latn",
625
+ "611": "sng_Latn",
626
+ "612": "snk_Latn",
627
+ "613": "snw_Latn",
628
+ "614": "soe_Latn",
629
+ "615": "som_Latn",
630
+ "616": "sop_Latn",
631
+ "617": "sor_Latn",
632
+ "618": "sot_Latn",
633
+ "619": "sox_Latn",
634
+ "620": "soy_Latn",
635
+ "621": "spp_Latn",
636
+ "622": "spy_Latn",
637
+ "623": "srr_Latn",
638
+ "624": "ssc_Latn",
639
+ "625": "ssn_Latn",
640
+ "626": "ssw_Latn",
641
+ "627": "stv_Ethi",
642
+ "628": "suk_Latn",
643
+ "629": "sur_Latn",
644
+ "630": "sus_Arab",
645
+ "631": "sus_Latn",
646
+ "632": "swa_Latn",
647
+ "633": "swb_Latn",
648
+ "634": "swc_Latn",
649
+ "635": "swh_Latn",
650
+ "636": "swk_Latn",
651
+ "637": "sxb_Latn",
652
+ "638": "tap_Latn",
653
+ "639": "taq_Latn",
654
+ "640": "taq_Tfng",
655
+ "641": "tbz_Latn",
656
+ "642": "tcc_Latn",
657
+ "643": "tcd_Latn",
658
+ "644": "tdx_Latn",
659
+ "645": "ted_Latn",
660
+ "646": "tem_Latn",
661
+ "647": "teo_Latn",
662
+ "648": "tex_Latn",
663
+ "649": "tgw_Latn",
664
+ "650": "thk_Latn",
665
+ "651": "thv_Latn",
666
+ "652": "thy_Latn",
667
+ "653": "tig_Ethi",
668
+ "654": "tik_Latn",
669
+ "655": "tir_Ethi",
670
+ "656": "tiv_Latn",
671
+ "657": "tjo_Arab",
672
+ "658": "tke_Latn",
673
+ "659": "tlj_Latn",
674
+ "660": "tll_Latn",
675
+ "661": "tmc_Latn",
676
+ "662": "tnr_Latn",
677
+ "663": "tod_Latn",
678
+ "664": "tog_Latn",
679
+ "665": "toh_Latn",
680
+ "666": "toi_Latn",
681
+ "667": "tpm_Latn",
682
+ "668": "tsb_Latn",
683
+ "669": "tsc_Latn",
684
+ "670": "tsn_Latn",
685
+ "671": "tso_Latn",
686
+ "672": "tsw_Latn",
687
+ "673": "ttj_Latn",
688
+ "674": "ttq_Latn",
689
+ "675": "ttq_Tfng",
690
+ "676": "ttr_Latn",
691
+ "677": "tui_Latn",
692
+ "678": "tul_Latn",
693
+ "679": "tum_Latn",
694
+ "680": "tuv_Latn",
695
+ "681": "tuz_Latn",
696
+ "682": "tvs_Latn",
697
+ "683": "tvu_Latn",
698
+ "684": "twi_Latn",
699
+ "685": "twx_Latn",
700
+ "686": "tzm_Tfng",
701
+ "687": "udu_Latn",
702
+ "688": "umb_Latn",
703
+ "689": "urh_Latn",
704
+ "690": "uth_Latn",
705
+ "691": "vag_Latn",
706
+ "692": "vai_Vaii",
707
+ "693": "ven_Latn",
708
+ "694": "vid_Latn",
709
+ "695": "vif_Latn",
710
+ "696": "vmk_Latn",
711
+ "697": "vmw_Latn",
712
+ "698": "vun_Latn",
713
+ "699": "vut_Latn",
714
+ "700": "wal_Latn",
715
+ "701": "wbi_Latn",
716
+ "702": "wec_Latn",
717
+ "703": "wes_Latn",
718
+ "704": "wib_Latn",
719
+ "705": "wlx_Latn",
720
+ "706": "wmw_Latn",
721
+ "707": "wni_Latn",
722
+ "708": "wob_Latn",
723
+ "709": "wol_Latn",
724
+ "710": "won_Latn",
725
+ "711": "wwa_Latn",
726
+ "712": "xan_Ethi",
727
+ "713": "xed_Latn",
728
+ "714": "xho_Latn",
729
+ "715": "xkg_Latn",
730
+ "716": "xmd_Latn",
731
+ "717": "xmg_Latn",
732
+ "718": "xmv_Latn",
733
+ "719": "xnz_Arab",
734
+ "720": "xog_Latn",
735
+ "721": "xon_Latn",
736
+ "722": "xpe_Latn",
737
+ "723": "xrb_Latn",
738
+ "724": "xsm_Latn",
739
+ "725": "xtc_Latn",
740
+ "726": "xuo_Latn",
741
+ "727": "yal_Latn",
742
+ "728": "yam_Latn",
743
+ "729": "yao_Latn",
744
+ "730": "yas_Latn",
745
+ "731": "yat_Latn",
746
+ "732": "yav_Latn",
747
+ "733": "yaz_Latn",
748
+ "734": "yba_Latn",
749
+ "735": "ybb_Latn",
750
+ "736": "yom_Latn",
751
+ "737": "yor_Latn",
752
+ "738": "yre_Latn",
753
+ "739": "zaj_Latn",
754
+ "740": "zdj_Latn",
755
+ "741": "zga_Latn",
756
+ "742": "zgh_Tfng",
757
+ "743": "ziw_Latn",
758
+ "744": "zne_Latn",
759
+ "745": "zul_Latn"
760
+ },
761
+ "initializer_range": 0.02,
762
+ "intermediate_size": 3072,
763
+ "label2id": {
764
+ "Akuapim-twi_Latn": 0,
765
+ "Asante-twi_Latn": 1,
766
+ "aaa_Latn": 2,
767
+ "aar_Arab": 3,
768
+ "aar_Latn": 4,
769
+ "aba_Latn": 5,
770
+ "abi_Latn": 6,
771
+ "abn_Latn": 7,
772
+ "acd_Latn": 8,
773
+ "ach_Latn": 9,
774
+ "acq_Arab": 10,
775
+ "ada_Latn": 11,
776
+ "ade_Latn": 12,
777
+ "adh_Latn": 13,
778
+ "adj_Latn": 14,
779
+ "adq_Latn": 15,
780
+ "aeb_Arab": 16,
781
+ "afr_Latn": 17,
782
+ "agq_Latn": 18,
783
+ "ags_Latn": 19,
784
+ "aha_Latn": 20,
785
+ "ajg_Latn": 21,
786
+ "aka_Latn": 22,
787
+ "akp_Latn": 23,
788
+ "ald_Latn": 24,
789
+ "alz_Latn": 25,
790
+ "amf_Latn": 26,
791
+ "amh_Ethi": 27,
792
+ "ann_Latn": 28,
793
+ "anu_Ethi": 29,
794
+ "anv_Latn": 30,
795
+ "any_Latn": 31,
796
+ "apd_Arab": 32,
797
+ "ara_Arab": 33,
798
+ "arb_Arab": 34,
799
+ "arb_Latn": 35,
800
+ "arq_Arab": 36,
801
+ "ary_Arab": 37,
802
+ "arz_Arab": 38,
803
+ "asa_Latn": 39,
804
+ "asg_Latn": 40,
805
+ "atg_Latn": 41,
806
+ "ati_Latn": 42,
807
+ "avn_Latn": 43,
808
+ "avu_Latn": 44,
809
+ "ayl_Arab": 45,
810
+ "azo_Latn": 46,
811
+ "bag_Latn": 47,
812
+ "bam_Latn": 48,
813
+ "bas_Latn": 49,
814
+ "bav_Latn": 50,
815
+ "baw_Arab": 51,
816
+ "bax_Latn": 52,
817
+ "bba_Latn": 53,
818
+ "bbj_Latn": 54,
819
+ "bbk_Latn": 55,
820
+ "bbo_Latn": 56,
821
+ "bce_Latn": 57,
822
+ "bci_Latn": 58,
823
+ "bcn_Latn": 59,
824
+ "bcw_Latn": 60,
825
+ "bcy_Latn": 61,
826
+ "bdh_Latn": 62,
827
+ "bds_Latn": 63,
828
+ "bec_Latn": 64,
829
+ "bem_Latn": 65,
830
+ "beq_Latn": 66,
831
+ "ber_Tfng": 67,
832
+ "bex_Latn": 68,
833
+ "bez_Latn": 69,
834
+ "bfa_Latn": 70,
835
+ "bfd_Latn": 71,
836
+ "bfm_Latn": 72,
837
+ "bfo_Latn": 73,
838
+ "bgf_Latn": 74,
839
+ "bhs_Latn": 75,
840
+ "bib_Latn": 76,
841
+ "bim_Latn": 77,
842
+ "bin_Latn": 78,
843
+ "biv_Latn": 79,
844
+ "bjv_Latn": 80,
845
+ "bkc_Latn": 81,
846
+ "bkh_Latn": 82,
847
+ "bkm_Latn": 83,
848
+ "bkv_Latn": 84,
849
+ "bky_Latn": 85,
850
+ "blh_Latn": 86,
851
+ "blo_Latn": 87,
852
+ "bmo_Latn": 88,
853
+ "bmq_Latn": 89,
854
+ "bmv_Latn": 90,
855
+ "bob_Latn": 91,
856
+ "bom_Latn": 92,
857
+ "bov_Latn": 93,
858
+ "box_Latn": 94,
859
+ "boz_Latn": 95,
860
+ "bqc_Latn": 96,
861
+ "bqj_Latn": 97,
862
+ "bqm_Latn": 98,
863
+ "bqp_Latn": 99,
864
+ "bri_Latn": 100,
865
+ "bsc_Latn": 101,
866
+ "bsp_Latn": 102,
867
+ "bsq_Latn": 103,
868
+ "bss_Latn": 104,
869
+ "bst_Ethi": 105,
870
+ "btt_Latn": 106,
871
+ "bud_Latn": 107,
872
+ "bum_Latn": 108,
873
+ "bun_Latn": 109,
874
+ "bus_Latn": 110,
875
+ "buy_Latn": 111,
876
+ "bwq_Latn": 112,
877
+ "bwr_Latn": 113,
878
+ "bwt_Latn": 114,
879
+ "bwu_Latn": 115,
880
+ "bxk_Latn": 116,
881
+ "byf_Latn": 117,
882
+ "byv_Latn": 118,
883
+ "bza_Latn": 119,
884
+ "bze_Arab": 120,
885
+ "bzw_Latn": 121,
886
+ "cce_Latn": 122,
887
+ "cgg_Latn": 123,
888
+ "chw_Latn": 124,
889
+ "cjk_Latn": 125,
890
+ "cko_Latn": 126,
891
+ "cme_Latn": 127,
892
+ "cop_Copt": 128,
893
+ "cou_Latn": 129,
894
+ "cri_Latn": 130,
895
+ "crs_Latn": 131,
896
+ "csk_Latn": 132,
897
+ "cuh_Latn": 133,
898
+ "cuv_Latn": 134,
899
+ "cwe_Latn": 135,
900
+ "cwt_Latn": 136,
901
+ "daa_Latn": 137,
902
+ "dag_Latn": 138,
903
+ "dav_Latn": 139,
904
+ "dbq_Latn": 140,
905
+ "ddn_Latn": 141,
906
+ "dga_Latn": 142,
907
+ "dgd_Latn": 143,
908
+ "dgi_Latn": 144,
909
+ "dhm_Latn": 145,
910
+ "dib_Latn": 146,
911
+ "did_Latn": 147,
912
+ "dig_Latn": 148,
913
+ "dik_Latn": 149,
914
+ "din_Latn": 150,
915
+ "dip_Latn": 151,
916
+ "diu_Latn": 152,
917
+ "dje_Latn": 153,
918
+ "dks_Latn": 154,
919
+ "dnj_Latn": 155,
920
+ "dop_Latn": 156,
921
+ "dos_Latn": 157,
922
+ "dov_Latn": 158,
923
+ "dow_Latn": 159,
924
+ "dsh_Latn": 160,
925
+ "dts_Latn": 161,
926
+ "dua_Latn": 162,
927
+ "dug_Latn": 163,
928
+ "dur_Latn": 164,
929
+ "dwr_Ethi": 165,
930
+ "dwr_Latn": 166,
931
+ "dyi_Latn": 167,
932
+ "dyo_Latn": 168,
933
+ "dyu_Latn": 169,
934
+ "ebr_Latn": 170,
935
+ "ebu_Latn": 171,
936
+ "efi_Latn": 172,
937
+ "ego_Latn": 173,
938
+ "eka_Latn": 174,
939
+ "ekm_Latn": 175,
940
+ "eko_Latn": 176,
941
+ "emk_Latn": 177,
942
+ "enb_Latn": 178,
943
+ "eot_Latn": 179,
944
+ "eto_Latn": 180,
945
+ "ets_Latn": 181,
946
+ "etu_Latn": 182,
947
+ "etx_Latn": 183,
948
+ "ewe_Latn": 184,
949
+ "ewo_Latn": 185,
950
+ "eza_Latn": 186,
951
+ "fak_Latn": 187,
952
+ "fal_Latn": 188,
953
+ "fan_Latn": 189,
954
+ "fat_Latn": 190,
955
+ "ffm_Latn": 191,
956
+ "fia_Latn": 192,
957
+ "fip_Latn": 193,
958
+ "fli_Latn": 194,
959
+ "flr_Latn": 195,
960
+ "fon_Latn": 196,
961
+ "fub_Latn": 197,
962
+ "fuc_Latn": 198,
963
+ "fue_Latn": 199,
964
+ "fuf_Latn": 200,
965
+ "fuh_Latn": 201,
966
+ "ful_Latn": 202,
967
+ "fuq_Latn": 203,
968
+ "fuv_Arab": 204,
969
+ "fuv_Latn": 205,
970
+ "fvr_Latn": 206,
971
+ "gaa_Latn": 207,
972
+ "gax_Latn": 208,
973
+ "gaz_Latn": 209,
974
+ "gbo_Latn": 210,
975
+ "gbr_Latn": 211,
976
+ "gde_Latn": 212,
977
+ "gej_Latn": 213,
978
+ "gez_Ethi": 214,
979
+ "gid_Latn": 215,
980
+ "giz_Latn": 216,
981
+ "gjn_Latn": 217,
982
+ "gkn_Latn": 218,
983
+ "gkp_Latn": 219,
984
+ "gmv_Ethi": 220,
985
+ "gmv_Latn": 221,
986
+ "gna_Latn": 222,
987
+ "gnd_Latn": 223,
988
+ "gng_Latn": 224,
989
+ "goa_Latn": 225,
990
+ "gof_Ethi": 226,
991
+ "gof_Latn": 227,
992
+ "gog_Latn": 228,
993
+ "gol_Vaii": 229,
994
+ "gou_Latn": 230,
995
+ "gqr_Latn": 231,
996
+ "gso_Latn": 232,
997
+ "gud_Latn": 233,
998
+ "guk_Ethi": 234,
999
+ "gur_Latn": 235,
1000
+ "guw_Latn": 236,
1001
+ "gux_Latn": 237,
1002
+ "guz_Latn": 238,
1003
+ "gvl_Latn": 239,
1004
+ "gwl_Latn": 240,
1005
+ "gwr_Latn": 241,
1006
+ "gya_Latn": 242,
1007
+ "hae_Latn": 243,
1008
+ "hag_Latn": 244,
1009
+ "har_Ethi": 245,
1010
+ "hau_Latn": 246,
1011
+ "hav_Latn": 247,
1012
+ "hay_Latn": 248,
1013
+ "hbb_Latn": 249,
1014
+ "hdy_Ethi": 250,
1015
+ "heh_Latn": 251,
1016
+ "her_Latn": 252,
1017
+ "hgm_Latn": 253,
1018
+ "hig_Latn": 254,
1019
+ "hna_Latn": 255,
1020
+ "ibb_Latn": 256,
1021
+ "ibo_Latn": 257,
1022
+ "idu_Latn": 258,
1023
+ "ife_Latn": 259,
1024
+ "igb_Latn": 260,
1025
+ "ige_Latn": 261,
1026
+ "igl_Latn": 262,
1027
+ "ijc_Latn": 263,
1028
+ "ijn_Latn": 264,
1029
+ "ijs_Latn": 265,
1030
+ "ikk_Latn": 266,
1031
+ "ikw_Latn": 267,
1032
+ "ilb_Latn": 268,
1033
+ "iqw_Latn": 269,
1034
+ "iri_Latn": 270,
1035
+ "irk_Latn": 271,
1036
+ "ish_Latn": 272,
1037
+ "iso_Latn": 273,
1038
+ "isu_Latn": 274,
1039
+ "iyx_Latn": 275,
1040
+ "izr_Latn": 276,
1041
+ "izz_Latn": 277,
1042
+ "jab_Latn": 278,
1043
+ "jbu_Latn": 279,
1044
+ "jen_Latn": 280,
1045
+ "jgo_Latn": 281,
1046
+ "jib_Latn": 282,
1047
+ "jit_Latn": 283,
1048
+ "jmc_Latn": 284,
1049
+ "kab_Latn": 285,
1050
+ "kam_Latn": 286,
1051
+ "kao_Latn": 287,
1052
+ "kau_Latn": 288,
1053
+ "kbn_Latn": 289,
1054
+ "kbo_Latn": 290,
1055
+ "kbp_Latn": 291,
1056
+ "kbr_Latn": 292,
1057
+ "kby_Latn": 293,
1058
+ "kcg_Latn": 294,
1059
+ "kck_Latn": 295,
1060
+ "kcp_Latn": 296,
1061
+ "kdc_Latn": 297,
1062
+ "kde_Latn": 298,
1063
+ "kdh_Latn": 299,
1064
+ "kdi_Latn": 300,
1065
+ "kdj_Latn": 301,
1066
+ "kdl_Latn": 302,
1067
+ "kdn_Latn": 303,
1068
+ "kea_Latn": 304,
1069
+ "ken_Latn": 305,
1070
+ "keo_Latn": 306,
1071
+ "ker_Latn": 307,
1072
+ "kez_Latn": 308,
1073
+ "khq_Latn": 309,
1074
+ "khy_Latn": 310,
1075
+ "kia_Latn": 311,
1076
+ "kik_Latn": 312,
1077
+ "kin_Latn": 313,
1078
+ "kiz_Latn": 314,
1079
+ "kki_Latn": 315,
1080
+ "kkj_Latn": 316,
1081
+ "kln_Latn": 317,
1082
+ "klu_Latn": 318,
1083
+ "kma_Latn": 319,
1084
+ "kmb_Latn": 320,
1085
+ "kmy_Latn": 321,
1086
+ "knc_Arab": 322,
1087
+ "knc_Latn": 323,
1088
+ "knf_Latn": 324,
1089
+ "kng_Latn": 325,
1090
+ "knk_Latn": 326,
1091
+ "kno_Latn": 327,
1092
+ "kny_Latn": 328,
1093
+ "kon_Latn": 329,
1094
+ "koo_Latn": 330,
1095
+ "koq_Latn": 331,
1096
+ "kpz_Latn": 332,
1097
+ "kqn_Latn": 333,
1098
+ "kqo_Latn": 334,
1099
+ "kqp_Latn": 335,
1100
+ "kqs_Latn": 336,
1101
+ "kqy_Ethi": 337,
1102
+ "kri_Latn": 338,
1103
+ "krs_Latn": 339,
1104
+ "krw_Latn": 340,
1105
+ "krx_Latn": 341,
1106
+ "ksb_Latn": 342,
1107
+ "ksf_Latn": 343,
1108
+ "ksp_Latn": 344,
1109
+ "kss_Latn": 345,
1110
+ "ktb_Ethi": 346,
1111
+ "ktj_Latn": 347,
1112
+ "ktu_Latn": 348,
1113
+ "ktz_Latn": 349,
1114
+ "kua_Latn": 350,
1115
+ "kub_Latn": 351,
1116
+ "kuj_Latn": 352,
1117
+ "kus_Latn": 353,
1118
+ "kvj_Latn": 354,
1119
+ "kwn_Latn": 355,
1120
+ "kwu_Latn": 356,
1121
+ "kwy_Latn": 357,
1122
+ "kxc_Ethi": 358,
1123
+ "kyf_Latn": 359,
1124
+ "kyq_Latn": 360,
1125
+ "kzn_Latn": 361,
1126
+ "kzr_Latn": 362,
1127
+ "lai_Latn": 363,
1128
+ "laj_Latn": 364,
1129
+ "lam_Latn": 365,
1130
+ "lan_Latn": 366,
1131
+ "lap_Latn": 367,
1132
+ "las_Latn": 368,
1133
+ "ldi_Latn": 369,
1134
+ "lea_Latn": 370,
1135
+ "led_Latn": 371,
1136
+ "lee_Latn": 372,
1137
+ "lef_Latn": 373,
1138
+ "leh_Latn": 374,
1139
+ "lem_Latn": 375,
1140
+ "lfa_Latn": 376,
1141
+ "lgg_Latn": 377,
1142
+ "lgm_Latn": 378,
1143
+ "lia_Latn": 379,
1144
+ "lik_Latn": 380,
1145
+ "lin_Latn": 381,
1146
+ "lip_Latn": 382,
1147
+ "lkb_Latn": 383,
1148
+ "lke_Latn": 384,
1149
+ "lko_Latn": 385,
1150
+ "llb_Latn": 386,
1151
+ "lln_Latn": 387,
1152
+ "lmd_Latn": 388,
1153
+ "lmp_Latn": 389,
1154
+ "lnl_Latn": 390,
1155
+ "lns_Latn": 391,
1156
+ "lob_Latn": 392,
1157
+ "log_Latn": 393,
1158
+ "loh_Latn": 394,
1159
+ "lok_Latn": 395,
1160
+ "lol_Latn": 396,
1161
+ "lom_Latn": 397,
1162
+ "loq_Latn": 398,
1163
+ "lot_Latn": 399,
1164
+ "loz_Latn": 400,
1165
+ "lro_Latn": 401,
1166
+ "lsm_Latn": 402,
1167
+ "lth_Latn": 403,
1168
+ "lto_Latn": 404,
1169
+ "lts_Latn": 405,
1170
+ "lua_Latn": 406,
1171
+ "lub_Latn": 407,
1172
+ "luc_Latn": 408,
1173
+ "lue_Latn": 409,
1174
+ "lug_Latn": 410,
1175
+ "lun_Latn": 411,
1176
+ "luo_Latn": 412,
1177
+ "luy_Latn": 413,
1178
+ "lwg_Latn": 414,
1179
+ "lwo_Latn": 415,
1180
+ "maf_Latn": 416,
1181
+ "mas_Latn": 417,
1182
+ "maw_Latn": 418,
1183
+ "mbu_Latn": 419,
1184
+ "mck_Latn": 420,
1185
+ "mcn_Latn": 421,
1186
+ "mcp_Latn": 422,
1187
+ "mcu_Latn": 423,
1188
+ "mda_Latn": 424,
1189
+ "mdm_Latn": 425,
1190
+ "mdy_Ethi": 426,
1191
+ "men_Latn": 427,
1192
+ "meq_Latn": 428,
1193
+ "mer_Latn": 429,
1194
+ "mev_Latn": 430,
1195
+ "mfe_Latn": 431,
1196
+ "mfg_Latn": 432,
1197
+ "mfh_Latn": 433,
1198
+ "mfi_Latn": 434,
1199
+ "mfj_Latn": 435,
1200
+ "mfk_Latn": 436,
1201
+ "mfq_Latn": 437,
1202
+ "mfz_Latn": 438,
1203
+ "mgc_Latn": 439,
1204
+ "mgg_Latn": 440,
1205
+ "mgh_Latn": 441,
1206
+ "mgo_Latn": 442,
1207
+ "mgq_Latn": 443,
1208
+ "mgr_Latn": 444,
1209
+ "mgw_Latn": 445,
1210
+ "mhi_Latn": 446,
1211
+ "mhw_Latn": 447,
1212
+ "mif_Latn": 448,
1213
+ "mkl_Latn": 449,
1214
+ "mlg_Latn": 450,
1215
+ "mlk_Latn": 451,
1216
+ "mlr_Latn": 452,
1217
+ "mlw_Latn": 453,
1218
+ "mmu_Latn": 454,
1219
+ "mmy_Latn": 455,
1220
+ "mne_Latn": 456,
1221
+ "mnf_Latn": 457,
1222
+ "mnk_Latn": 458,
1223
+ "mny_Latn": 459,
1224
+ "moa_Latn": 460,
1225
+ "mor_Latn": 461,
1226
+ "mos_Latn": 462,
1227
+ "moy_Latn": 463,
1228
+ "moz_Latn": 464,
1229
+ "mpe_Ethi": 465,
1230
+ "mpg_Latn": 466,
1231
+ "mqb_Latn": 467,
1232
+ "msc_Latn": 468,
1233
+ "mse_Latn": 469,
1234
+ "mua_Latn": 470,
1235
+ "mug_Latn": 471,
1236
+ "muh_Latn": 472,
1237
+ "mur_Latn": 473,
1238
+ "muy_Latn": 474,
1239
+ "mwe_Latn": 475,
1240
+ "mwm_Latn": 476,
1241
+ "mwn_Latn": 477,
1242
+ "mws_Latn": 478,
1243
+ "mxu_Latn": 479,
1244
+ "myb_Latn": 480,
1245
+ "myk_Latn": 481,
1246
+ "myx_Latn": 482,
1247
+ "mzk_Latn": 483,
1248
+ "mzm_Latn": 484,
1249
+ "mzw_Latn": 485,
1250
+ "naq_Latn": 486,
1251
+ "naw_Latn": 487,
1252
+ "nba_Latn": 488,
1253
+ "nbl_Latn": 489,
1254
+ "ncu_Latn": 490,
1255
+ "ndc_Latn": 491,
1256
+ "nde_Latn": 492,
1257
+ "ndh_Latn": 493,
1258
+ "ndi_Latn": 494,
1259
+ "ndj_Latn": 495,
1260
+ "ndo_Latn": 496,
1261
+ "ndp_Latn": 497,
1262
+ "ndv_Latn": 498,
1263
+ "ndy_Latn": 499,
1264
+ "ndz_Latn": 500,
1265
+ "neb_Latn": 501,
1266
+ "nfr_Latn": 502,
1267
+ "ngb_Latn": 503,
1268
+ "ngc_Latn": 504,
1269
+ "nge_Latn": 505,
1270
+ "ngl_Latn": 506,
1271
+ "ngn_Latn": 507,
1272
+ "ngp_Latn": 508,
1273
+ "nhr_Latn": 509,
1274
+ "nhu_Latn": 510,
1275
+ "nih_Latn": 511,
1276
+ "nim_Latn": 512,
1277
+ "nin_Latn": 513,
1278
+ "niq_Latn": 514,
1279
+ "niy_Latn": 515,
1280
+ "njd_Latn": 516,
1281
+ "njy_Latn": 517,
1282
+ "nka_Latn": 518,
1283
+ "nko_Latn": 519,
1284
+ "nku_Latn": 520,
1285
+ "nla_Latn": 521,
1286
+ "nle_Latn": 522,
1287
+ "nmz_Latn": 523,
1288
+ "nnb_Latn": 524,
1289
+ "nnh_Latn": 525,
1290
+ "nnq_Latn": 526,
1291
+ "nnw_Latn": 527,
1292
+ "nqo_Nkoo": 528,
1293
+ "nse_Latn": 529,
1294
+ "nso_Latn": 530,
1295
+ "ntr_Latn": 531,
1296
+ "nuj_Latn": 532,
1297
+ "nup_Latn": 533,
1298
+ "nus_Latn": 534,
1299
+ "nwb_Latn": 535,
1300
+ "nwe_Latn": 536,
1301
+ "nxd_Latn": 537,
1302
+ "nya_Latn": 538,
1303
+ "nyb_Latn": 539,
1304
+ "nyd_Latn": 540,
1305
+ "nyf_Latn": 541,
1306
+ "nyk_Latn": 542,
1307
+ "nym_Latn": 543,
1308
+ "nyn_Latn": 544,
1309
+ "nyo_Latn": 545,
1310
+ "nyu_Latn": 546,
1311
+ "nyy_Latn": 547,
1312
+ "nza_Latn": 548,
1313
+ "nzi_Latn": 549,
1314
+ "odu_Latn": 550,
1315
+ "ogo_Latn": 551,
1316
+ "oke_Latn": 552,
1317
+ "oki_Latn": 553,
1318
+ "okr_Latn": 554,
1319
+ "oku_Latn": 555,
1320
+ "old_Latn": 556,
1321
+ "orm_Latn": 557,
1322
+ "ozm_Latn": 558,
1323
+ "pae_Latn": 559,
1324
+ "pbi_Latn": 560,
1325
+ "pcm_Latn": 561,
1326
+ "pem_Latn": 562,
1327
+ "pfe_Latn": 563,
1328
+ "phm_Latn": 564,
1329
+ "pil_Latn": 565,
1330
+ "pkb_Latn": 566,
1331
+ "pko_Latn": 567,
1332
+ "plt_Latn": 568,
1333
+ "pny_Latn": 569,
1334
+ "pnz_Latn": 570,
1335
+ "pov_Latn": 571,
1336
+ "poy_Latn": 572,
1337
+ "rag_Latn": 573,
1338
+ "rcf_Latn": 574,
1339
+ "rel_Latn": 575,
1340
+ "rif_Tfng": 576,
1341
+ "rim_Latn": 577,
1342
+ "rnd_Latn": 578,
1343
+ "rng_Latn": 579,
1344
+ "rub_Latn": 580,
1345
+ "ruf_Latn": 581,
1346
+ "run_Latn": 582,
1347
+ "rwk_Latn": 583,
1348
+ "sag_Latn": 584,
1349
+ "saq_Latn": 585,
1350
+ "say_Latn": 586,
1351
+ "sba_Latn": 587,
1352
+ "sbd_Latn": 588,
1353
+ "sbp_Latn": 589,
1354
+ "sbs_Latn": 590,
1355
+ "sby_Latn": 591,
1356
+ "sef_Latn": 592,
1357
+ "seh_Latn": 593,
1358
+ "ses_Latn": 594,
1359
+ "sev_Latn": 595,
1360
+ "sfw_Latn": 596,
1361
+ "sgc_Latn": 597,
1362
+ "sgw_Ethi": 598,
1363
+ "shi_Latn": 599,
1364
+ "shj_Latn": 600,
1365
+ "shk_Latn": 601,
1366
+ "shr_Latn": 602,
1367
+ "shu_Arab": 603,
1368
+ "sid_Latn": 604,
1369
+ "sig_Latn": 605,
1370
+ "sil_Latn": 606,
1371
+ "skg_Latn": 607,
1372
+ "sld_Latn": 608,
1373
+ "sna_Latn": 609,
1374
+ "snf_Latn": 610,
1375
+ "sng_Latn": 611,
1376
+ "snk_Latn": 612,
1377
+ "snw_Latn": 613,
1378
+ "soe_Latn": 614,
1379
+ "som_Latn": 615,
1380
+ "sop_Latn": 616,
1381
+ "sor_Latn": 617,
1382
+ "sot_Latn": 618,
1383
+ "sox_Latn": 619,
1384
+ "soy_Latn": 620,
1385
+ "spp_Latn": 621,
1386
+ "spy_Latn": 622,
1387
+ "srr_Latn": 623,
1388
+ "ssc_Latn": 624,
1389
+ "ssn_Latn": 625,
1390
+ "ssw_Latn": 626,
1391
+ "stv_Ethi": 627,
1392
+ "suk_Latn": 628,
1393
+ "sur_Latn": 629,
1394
+ "sus_Arab": 630,
1395
+ "sus_Latn": 631,
1396
+ "swa_Latn": 632,
1397
+ "swb_Latn": 633,
1398
+ "swc_Latn": 634,
1399
+ "swh_Latn": 635,
1400
+ "swk_Latn": 636,
1401
+ "sxb_Latn": 637,
1402
+ "tap_Latn": 638,
1403
+ "taq_Latn": 639,
1404
+ "taq_Tfng": 640,
1405
+ "tbz_Latn": 641,
1406
+ "tcc_Latn": 642,
1407
+ "tcd_Latn": 643,
1408
+ "tdx_Latn": 644,
1409
+ "ted_Latn": 645,
1410
+ "tem_Latn": 646,
1411
+ "teo_Latn": 647,
1412
+ "tex_Latn": 648,
1413
+ "tgw_Latn": 649,
1414
+ "thk_Latn": 650,
1415
+ "thv_Latn": 651,
1416
+ "thy_Latn": 652,
1417
+ "tig_Ethi": 653,
1418
+ "tik_Latn": 654,
1419
+ "tir_Ethi": 655,
1420
+ "tiv_Latn": 656,
1421
+ "tjo_Arab": 657,
1422
+ "tke_Latn": 658,
1423
+ "tlj_Latn": 659,
1424
+ "tll_Latn": 660,
1425
+ "tmc_Latn": 661,
1426
+ "tnr_Latn": 662,
1427
+ "tod_Latn": 663,
1428
+ "tog_Latn": 664,
1429
+ "toh_Latn": 665,
1430
+ "toi_Latn": 666,
1431
+ "tpm_Latn": 667,
1432
+ "tsb_Latn": 668,
1433
+ "tsc_Latn": 669,
1434
+ "tsn_Latn": 670,
1435
+ "tso_Latn": 671,
1436
+ "tsw_Latn": 672,
1437
+ "ttj_Latn": 673,
1438
+ "ttq_Latn": 674,
1439
+ "ttq_Tfng": 675,
1440
+ "ttr_Latn": 676,
1441
+ "tui_Latn": 677,
1442
+ "tul_Latn": 678,
1443
+ "tum_Latn": 679,
1444
+ "tuv_Latn": 680,
1445
+ "tuz_Latn": 681,
1446
+ "tvs_Latn": 682,
1447
+ "tvu_Latn": 683,
1448
+ "twi_Latn": 684,
1449
+ "twx_Latn": 685,
1450
+ "tzm_Tfng": 686,
1451
+ "udu_Latn": 687,
1452
+ "umb_Latn": 688,
1453
+ "urh_Latn": 689,
1454
+ "uth_Latn": 690,
1455
+ "vag_Latn": 691,
1456
+ "vai_Vaii": 692,
1457
+ "ven_Latn": 693,
1458
+ "vid_Latn": 694,
1459
+ "vif_Latn": 695,
1460
+ "vmk_Latn": 696,
1461
+ "vmw_Latn": 697,
1462
+ "vun_Latn": 698,
1463
+ "vut_Latn": 699,
1464
+ "wal_Latn": 700,
1465
+ "wbi_Latn": 701,
1466
+ "wec_Latn": 702,
1467
+ "wes_Latn": 703,
1468
+ "wib_Latn": 704,
1469
+ "wlx_Latn": 705,
1470
+ "wmw_Latn": 706,
1471
+ "wni_Latn": 707,
1472
+ "wob_Latn": 708,
1473
+ "wol_Latn": 709,
1474
+ "won_Latn": 710,
1475
+ "wwa_Latn": 711,
1476
+ "xan_Ethi": 712,
1477
+ "xed_Latn": 713,
1478
+ "xho_Latn": 714,
1479
+ "xkg_Latn": 715,
1480
+ "xmd_Latn": 716,
1481
+ "xmg_Latn": 717,
1482
+ "xmv_Latn": 718,
1483
+ "xnz_Arab": 719,
1484
+ "xog_Latn": 720,
1485
+ "xon_Latn": 721,
1486
+ "xpe_Latn": 722,
1487
+ "xrb_Latn": 723,
1488
+ "xsm_Latn": 724,
1489
+ "xtc_Latn": 725,
1490
+ "xuo_Latn": 726,
1491
+ "yal_Latn": 727,
1492
+ "yam_Latn": 728,
1493
+ "yao_Latn": 729,
1494
+ "yas_Latn": 730,
1495
+ "yat_Latn": 731,
1496
+ "yav_Latn": 732,
1497
+ "yaz_Latn": 733,
1498
+ "yba_Latn": 734,
1499
+ "ybb_Latn": 735,
1500
+ "yom_Latn": 736,
1501
+ "yor_Latn": 737,
1502
+ "yre_Latn": 738,
1503
+ "zaj_Latn": 739,
1504
+ "zdj_Latn": 740,
1505
+ "zga_Latn": 741,
1506
+ "zgh_Tfng": 742,
1507
+ "ziw_Latn": 743,
1508
+ "zne_Latn": 744,
1509
+ "zul_Latn": 745
1510
+ },
1511
+ "layer_norm_eps": 1e-05,
1512
+ "max_position_embeddings": 514,
1513
+ "model_type": "xlm-roberta",
1514
+ "num_attention_heads": 12,
1515
+ "num_hidden_layers": 12,
1516
+ "output_past": true,
1517
+ "pad_token_id": 1,
1518
+ "position_embedding_type": "absolute",
1519
+ "problem_type": "single_label_classification",
1520
+ "torch_dtype": "float32",
1521
+ "transformers_version": "4.53.2",
1522
+ "type_vocab_size": 1,
1523
+ "use_cache": true,
1524
+ "vocab_size": 250004
1525
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e2851c540c01115b27e932bd376e0cd8ab6a274e17b3472b46abd4be4bb473
3
+ size 1114499712
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0efed36a45e8359b3380aba1cc98f3096bfa437b19a19c42194c639c56bb93b
3
+ size 2229119371
rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e9c6bbc69fd20063e81dc2429f7f961800312914976c518e9ef887ff749fa7e
3
+ size 15429
rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9711e49f3de475ceecca59e98d9a55cbeb3245096e3e9f62afbd5b705686d731
3
+ size 15429
rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760a79b90c32c0408687a773c003e0dd6c41a152084b00b9d1173db31a24eb41
3
+ size 15429
rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a93f3ebd307a208b050b6e04fa585434e7ee750d0099f521d4048dc8c379cea
3
+ size 15429
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e7e21f50165a923681c2ab71aa48946a7574ecb838b6a700464b242904eef6
3
+ size 1465
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9459d6b3ea13588a4e2137d473d03cfc84154c648388e07c7be8a1470fd4d1
3
+ size 4796746
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd64a4b8fd1dfb4c67288b563552a85d89417180fe0439e3bb56064c7d1b4a1
3
+ size 34142685
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250003": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "sp_model_kwargs": {},
54
+ "tokenizer_class": "XLMRobertaTokenizer",
55
+ "unk_token": "<unk>",
56
+ "use_fast": true
57
+ }
trainer_state.json ADDED
@@ -0,0 +1,2000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 138718,
3
+ "best_metric": 0.9915470627263667,
4
+ "best_model_checkpoint": "/home/skwon01/scratch/sibal/finetuned_models/serengeti_camera_ready/checkpoint-138718",
5
+ "epoch": 2.0,
6
+ "eval_steps": 1000.0,
7
+ "global_step": 138718,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.007208869793393791,
14
+ "grad_norm": 2.880587577819824,
15
+ "learning_rate": 1.9985611095892387e-05,
16
+ "loss": 3.675,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.014417739586787583,
21
+ "grad_norm": 3.1965484619140625,
22
+ "learning_rate": 1.99711933563056e-05,
23
+ "loss": 1.3703,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.021626609380181374,
28
+ "grad_norm": 3.587383270263672,
29
+ "learning_rate": 1.9956775616718814e-05,
30
+ "loss": 0.7317,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.028835479173575165,
35
+ "grad_norm": 2.73246169090271,
36
+ "learning_rate": 1.9942357877132026e-05,
37
+ "loss": 0.4764,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.03604434896696896,
42
+ "grad_norm": 3.9599311351776123,
43
+ "learning_rate": 1.9927940137545237e-05,
44
+ "loss": 0.3488,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.04325321876036275,
49
+ "grad_norm": 3.690446138381958,
50
+ "learning_rate": 1.991352239795845e-05,
51
+ "loss": 0.2729,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.05046208855375654,
56
+ "grad_norm": 3.0428125858306885,
57
+ "learning_rate": 1.989910465837166e-05,
58
+ "loss": 0.2249,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.05767095834715033,
63
+ "grad_norm": 2.6362786293029785,
64
+ "learning_rate": 1.9884686918784876e-05,
65
+ "loss": 0.1907,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.06487982814054413,
70
+ "grad_norm": 4.072872161865234,
71
+ "learning_rate": 1.9870269179198087e-05,
72
+ "loss": 0.1695,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.07208869793393792,
77
+ "grad_norm": 2.4177143573760986,
78
+ "learning_rate": 1.98558514396113e-05,
79
+ "loss": 0.1535,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.07929756772733171,
84
+ "grad_norm": 2.4438438415527344,
85
+ "learning_rate": 1.9841433700024514e-05,
86
+ "loss": 0.1429,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.0865064375207255,
91
+ "grad_norm": 1.9982225894927979,
92
+ "learning_rate": 1.9827015960437722e-05,
93
+ "loss": 0.1348,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.0937153073141193,
98
+ "grad_norm": 2.988769769668579,
99
+ "learning_rate": 1.9812598220850938e-05,
100
+ "loss": 0.1226,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.10092417710751309,
105
+ "grad_norm": 2.386380672454834,
106
+ "learning_rate": 1.979818048126415e-05,
107
+ "loss": 0.1168,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.10813304690090687,
112
+ "grad_norm": 1.9924527406692505,
113
+ "learning_rate": 1.978376274167736e-05,
114
+ "loss": 0.1082,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.11534191669430066,
119
+ "grad_norm": 1.9020510911941528,
120
+ "learning_rate": 1.9769345002090573e-05,
121
+ "loss": 0.1064,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.12255078648769446,
126
+ "grad_norm": 2.333510160446167,
127
+ "learning_rate": 1.9754927262503788e-05,
128
+ "loss": 0.1029,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.12975965628108826,
133
+ "grad_norm": 2.677407741546631,
134
+ "learning_rate": 1.9740509522917e-05,
135
+ "loss": 0.0995,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.13696852607448204,
140
+ "grad_norm": 1.5480279922485352,
141
+ "learning_rate": 1.972609178333021e-05,
142
+ "loss": 0.0948,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.14417739586787584,
147
+ "grad_norm": 2.630037546157837,
148
+ "learning_rate": 1.9711674043743423e-05,
149
+ "loss": 0.0937,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.15138626566126961,
154
+ "grad_norm": 2.267946243286133,
155
+ "learning_rate": 1.9697256304156634e-05,
156
+ "loss": 0.0909,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.15859513545466342,
161
+ "grad_norm": 2.932375907897949,
162
+ "learning_rate": 1.968283856456985e-05,
163
+ "loss": 0.0889,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.16580400524805722,
168
+ "grad_norm": 2.69350528717041,
169
+ "learning_rate": 1.966842082498306e-05,
170
+ "loss": 0.086,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.173012875041451,
175
+ "grad_norm": 2.1316378116607666,
176
+ "learning_rate": 1.9654003085396273e-05,
177
+ "loss": 0.0843,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.1802217448348448,
182
+ "grad_norm": 2.52103853225708,
183
+ "learning_rate": 1.9639585345809488e-05,
184
+ "loss": 0.0828,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.1874306146282386,
189
+ "grad_norm": 1.939334511756897,
190
+ "learning_rate": 1.9625167606222696e-05,
191
+ "loss": 0.0795,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 0.19463948442163237,
196
+ "grad_norm": 2.3057949542999268,
197
+ "learning_rate": 1.961074986663591e-05,
198
+ "loss": 0.0786,
199
+ "step": 13500
200
+ },
201
+ {
202
+ "epoch": 0.20184835421502617,
203
+ "grad_norm": 2.0021777153015137,
204
+ "learning_rate": 1.9596332127049123e-05,
205
+ "loss": 0.0773,
206
+ "step": 14000
207
+ },
208
+ {
209
+ "epoch": 0.20905722400841997,
210
+ "grad_norm": 2.276421546936035,
211
+ "learning_rate": 1.9581914387462335e-05,
212
+ "loss": 0.0772,
213
+ "step": 14500
214
+ },
215
+ {
216
+ "epoch": 0.21626609380181375,
217
+ "grad_norm": 2.426966428756714,
218
+ "learning_rate": 1.9567496647875546e-05,
219
+ "loss": 0.0746,
220
+ "step": 15000
221
+ },
222
+ {
223
+ "epoch": 0.22347496359520755,
224
+ "grad_norm": 1.984330415725708,
225
+ "learning_rate": 1.955307890828876e-05,
226
+ "loss": 0.074,
227
+ "step": 15500
228
+ },
229
+ {
230
+ "epoch": 0.23068383338860132,
231
+ "grad_norm": 2.1131157875061035,
232
+ "learning_rate": 1.9538661168701973e-05,
233
+ "loss": 0.0754,
234
+ "step": 16000
235
+ },
236
+ {
237
+ "epoch": 0.23789270318199512,
238
+ "grad_norm": 2.672717332839966,
239
+ "learning_rate": 1.9524243429115185e-05,
240
+ "loss": 0.0719,
241
+ "step": 16500
242
+ },
243
+ {
244
+ "epoch": 0.24510157297538893,
245
+ "grad_norm": 1.4720840454101562,
246
+ "learning_rate": 1.9509825689528396e-05,
247
+ "loss": 0.0689,
248
+ "step": 17000
249
+ },
250
+ {
251
+ "epoch": 0.25231044276878273,
252
+ "grad_norm": 1.7824233770370483,
253
+ "learning_rate": 1.9495407949941608e-05,
254
+ "loss": 0.0711,
255
+ "step": 17500
256
+ },
257
+ {
258
+ "epoch": 0.25951931256217653,
259
+ "grad_norm": 1.7139828205108643,
260
+ "learning_rate": 1.9480990210354823e-05,
261
+ "loss": 0.067,
262
+ "step": 18000
263
+ },
264
+ {
265
+ "epoch": 0.2667281823555703,
266
+ "grad_norm": 2.2731082439422607,
267
+ "learning_rate": 1.9466572470768035e-05,
268
+ "loss": 0.0678,
269
+ "step": 18500
270
+ },
271
+ {
272
+ "epoch": 0.2739370521489641,
273
+ "grad_norm": 2.2537448406219482,
274
+ "learning_rate": 1.9452154731181247e-05,
275
+ "loss": 0.0657,
276
+ "step": 19000
277
+ },
278
+ {
279
+ "epoch": 0.2811459219423579,
280
+ "grad_norm": 3.0216615200042725,
281
+ "learning_rate": 1.943773699159446e-05,
282
+ "loss": 0.0656,
283
+ "step": 19500
284
+ },
285
+ {
286
+ "epoch": 0.2883547917357517,
287
+ "grad_norm": 1.4544578790664673,
288
+ "learning_rate": 1.942331925200767e-05,
289
+ "loss": 0.0658,
290
+ "step": 20000
291
+ },
292
+ {
293
+ "epoch": 0.2955636615291455,
294
+ "grad_norm": 2.4549198150634766,
295
+ "learning_rate": 1.9408901512420885e-05,
296
+ "loss": 0.0641,
297
+ "step": 20500
298
+ },
299
+ {
300
+ "epoch": 0.30277253132253923,
301
+ "grad_norm": 1.514060616493225,
302
+ "learning_rate": 1.9394483772834097e-05,
303
+ "loss": 0.0633,
304
+ "step": 21000
305
+ },
306
+ {
307
+ "epoch": 0.30998140111593303,
308
+ "grad_norm": 2.4346635341644287,
309
+ "learning_rate": 1.9380066033247308e-05,
310
+ "loss": 0.0627,
311
+ "step": 21500
312
+ },
313
+ {
314
+ "epoch": 0.31719027090932683,
315
+ "grad_norm": 1.432133436203003,
316
+ "learning_rate": 1.9365648293660523e-05,
317
+ "loss": 0.0616,
318
+ "step": 22000
319
+ },
320
+ {
321
+ "epoch": 0.32439914070272063,
322
+ "grad_norm": 1.2359411716461182,
323
+ "learning_rate": 1.9351230554073735e-05,
324
+ "loss": 0.0628,
325
+ "step": 22500
326
+ },
327
+ {
328
+ "epoch": 0.33160801049611444,
329
+ "grad_norm": 2.1902575492858887,
330
+ "learning_rate": 1.9336812814486947e-05,
331
+ "loss": 0.0628,
332
+ "step": 23000
333
+ },
334
+ {
335
+ "epoch": 0.33881688028950824,
336
+ "grad_norm": 1.7415978908538818,
337
+ "learning_rate": 1.932239507490016e-05,
338
+ "loss": 0.0616,
339
+ "step": 23500
340
+ },
341
+ {
342
+ "epoch": 0.346025750082902,
343
+ "grad_norm": 1.401383399963379,
344
+ "learning_rate": 1.930797733531337e-05,
345
+ "loss": 0.0589,
346
+ "step": 24000
347
+ },
348
+ {
349
+ "epoch": 0.3532346198762958,
350
+ "grad_norm": 1.5828105211257935,
351
+ "learning_rate": 1.9293559595726582e-05,
352
+ "loss": 0.0604,
353
+ "step": 24500
354
+ },
355
+ {
356
+ "epoch": 0.3604434896696896,
357
+ "grad_norm": 0.8541142344474792,
358
+ "learning_rate": 1.9279141856139797e-05,
359
+ "loss": 0.0599,
360
+ "step": 25000
361
+ },
362
+ {
363
+ "epoch": 0.3676523594630834,
364
+ "grad_norm": 2.8157145977020264,
365
+ "learning_rate": 1.926472411655301e-05,
366
+ "loss": 0.0593,
367
+ "step": 25500
368
+ },
369
+ {
370
+ "epoch": 0.3748612292564772,
371
+ "grad_norm": 2.129725217819214,
372
+ "learning_rate": 1.925030637696622e-05,
373
+ "loss": 0.0578,
374
+ "step": 26000
375
+ },
376
+ {
377
+ "epoch": 0.38207009904987094,
378
+ "grad_norm": 2.5838279724121094,
379
+ "learning_rate": 1.9235888637379435e-05,
380
+ "loss": 0.0574,
381
+ "step": 26500
382
+ },
383
+ {
384
+ "epoch": 0.38927896884326474,
385
+ "grad_norm": 1.7000998258590698,
386
+ "learning_rate": 1.9221470897792647e-05,
387
+ "loss": 0.0553,
388
+ "step": 27000
389
+ },
390
+ {
391
+ "epoch": 0.39648783863665854,
392
+ "grad_norm": 1.2641727924346924,
393
+ "learning_rate": 1.920705315820586e-05,
394
+ "loss": 0.0549,
395
+ "step": 27500
396
+ },
397
+ {
398
+ "epoch": 0.40369670843005234,
399
+ "grad_norm": 1.7529101371765137,
400
+ "learning_rate": 1.919263541861907e-05,
401
+ "loss": 0.0562,
402
+ "step": 28000
403
+ },
404
+ {
405
+ "epoch": 0.41090557822344614,
406
+ "grad_norm": 1.4027022123336792,
407
+ "learning_rate": 1.9178217679032282e-05,
408
+ "loss": 0.0552,
409
+ "step": 28500
410
+ },
411
+ {
412
+ "epoch": 0.41811444801683995,
413
+ "grad_norm": 1.6767141819000244,
414
+ "learning_rate": 1.9163799939445497e-05,
415
+ "loss": 0.0572,
416
+ "step": 29000
417
+ },
418
+ {
419
+ "epoch": 0.4253233178102337,
420
+ "grad_norm": 0.8946545720100403,
421
+ "learning_rate": 1.914938219985871e-05,
422
+ "loss": 0.0556,
423
+ "step": 29500
424
+ },
425
+ {
426
+ "epoch": 0.4325321876036275,
427
+ "grad_norm": 2.469862937927246,
428
+ "learning_rate": 1.913496446027192e-05,
429
+ "loss": 0.0546,
430
+ "step": 30000
431
+ },
432
+ {
433
+ "epoch": 0.4397410573970213,
434
+ "grad_norm": 3.368171215057373,
435
+ "learning_rate": 1.9120546720685132e-05,
436
+ "loss": 0.0527,
437
+ "step": 30500
438
+ },
439
+ {
440
+ "epoch": 0.4469499271904151,
441
+ "grad_norm": 2.107477903366089,
442
+ "learning_rate": 1.9106128981098344e-05,
443
+ "loss": 0.0538,
444
+ "step": 31000
445
+ },
446
+ {
447
+ "epoch": 0.4541587969838089,
448
+ "grad_norm": 1.8676276206970215,
449
+ "learning_rate": 1.9091711241511555e-05,
450
+ "loss": 0.0529,
451
+ "step": 31500
452
+ },
453
+ {
454
+ "epoch": 0.46136766677720265,
455
+ "grad_norm": 1.8789501190185547,
456
+ "learning_rate": 1.907729350192477e-05,
457
+ "loss": 0.0525,
458
+ "step": 32000
459
+ },
460
+ {
461
+ "epoch": 0.46857653657059645,
462
+ "grad_norm": 1.8588016033172607,
463
+ "learning_rate": 1.9062875762337982e-05,
464
+ "loss": 0.0519,
465
+ "step": 32500
466
+ },
467
+ {
468
+ "epoch": 0.47578540636399025,
469
+ "grad_norm": 1.6721725463867188,
470
+ "learning_rate": 1.9048458022751194e-05,
471
+ "loss": 0.0508,
472
+ "step": 33000
473
+ },
474
+ {
475
+ "epoch": 0.48299427615738405,
476
+ "grad_norm": 1.9724555015563965,
477
+ "learning_rate": 1.903404028316441e-05,
478
+ "loss": 0.0502,
479
+ "step": 33500
480
+ },
481
+ {
482
+ "epoch": 0.49020314595077785,
483
+ "grad_norm": 1.9921311140060425,
484
+ "learning_rate": 1.901962254357762e-05,
485
+ "loss": 0.051,
486
+ "step": 34000
487
+ },
488
+ {
489
+ "epoch": 0.49741201574417165,
490
+ "grad_norm": 2.889782190322876,
491
+ "learning_rate": 1.9005204803990832e-05,
492
+ "loss": 0.0518,
493
+ "step": 34500
494
+ },
495
+ {
496
+ "epoch": 0.5046208855375655,
497
+ "grad_norm": 1.7622694969177246,
498
+ "learning_rate": 1.8990787064404044e-05,
499
+ "loss": 0.0494,
500
+ "step": 35000
501
+ },
502
+ {
503
+ "epoch": 0.5118297553309592,
504
+ "grad_norm": 1.713699460029602,
505
+ "learning_rate": 1.8976369324817256e-05,
506
+ "loss": 0.0493,
507
+ "step": 35500
508
+ },
509
+ {
510
+ "epoch": 0.5190386251243531,
511
+ "grad_norm": 1.262862205505371,
512
+ "learning_rate": 1.896195158523047e-05,
513
+ "loss": 0.0496,
514
+ "step": 36000
515
+ },
516
+ {
517
+ "epoch": 0.5262474949177468,
518
+ "grad_norm": 2.085010051727295,
519
+ "learning_rate": 1.8947533845643682e-05,
520
+ "loss": 0.0509,
521
+ "step": 36500
522
+ },
523
+ {
524
+ "epoch": 0.5334563647111406,
525
+ "grad_norm": 1.6257765293121338,
526
+ "learning_rate": 1.8933116106056894e-05,
527
+ "loss": 0.0498,
528
+ "step": 37000
529
+ },
530
+ {
531
+ "epoch": 0.5406652345045344,
532
+ "grad_norm": 0.6558777093887329,
533
+ "learning_rate": 1.8918698366470106e-05,
534
+ "loss": 0.0484,
535
+ "step": 37500
536
+ },
537
+ {
538
+ "epoch": 0.5478741042979282,
539
+ "grad_norm": 1.7351698875427246,
540
+ "learning_rate": 1.8904280626883318e-05,
541
+ "loss": 0.0496,
542
+ "step": 38000
543
+ },
544
+ {
545
+ "epoch": 0.555082974091322,
546
+ "grad_norm": 0.915392279624939,
547
+ "learning_rate": 1.888986288729653e-05,
548
+ "loss": 0.0467,
549
+ "step": 38500
550
+ },
551
+ {
552
+ "epoch": 0.5622918438847158,
553
+ "grad_norm": 0.9719710350036621,
554
+ "learning_rate": 1.8875445147709744e-05,
555
+ "loss": 0.0491,
556
+ "step": 39000
557
+ },
558
+ {
559
+ "epoch": 0.5695007136781095,
560
+ "grad_norm": 0.4347970485687256,
561
+ "learning_rate": 1.8861027408122956e-05,
562
+ "loss": 0.0478,
563
+ "step": 39500
564
+ },
565
+ {
566
+ "epoch": 0.5767095834715034,
567
+ "grad_norm": 1.4013206958770752,
568
+ "learning_rate": 1.8846609668536168e-05,
569
+ "loss": 0.0482,
570
+ "step": 40000
571
+ },
572
+ {
573
+ "epoch": 0.5839184532648971,
574
+ "grad_norm": 1.6916135549545288,
575
+ "learning_rate": 1.8832191928949383e-05,
576
+ "loss": 0.0487,
577
+ "step": 40500
578
+ },
579
+ {
580
+ "epoch": 0.591127323058291,
581
+ "grad_norm": 1.1497479677200317,
582
+ "learning_rate": 1.8817774189362594e-05,
583
+ "loss": 0.0473,
584
+ "step": 41000
585
+ },
586
+ {
587
+ "epoch": 0.5983361928516847,
588
+ "grad_norm": 2.1202707290649414,
589
+ "learning_rate": 1.8803356449775806e-05,
590
+ "loss": 0.046,
591
+ "step": 41500
592
+ },
593
+ {
594
+ "epoch": 0.6055450626450785,
595
+ "grad_norm": 1.8288294076919556,
596
+ "learning_rate": 1.8788938710189018e-05,
597
+ "loss": 0.0473,
598
+ "step": 42000
599
+ },
600
+ {
601
+ "epoch": 0.6127539324384723,
602
+ "grad_norm": 0.8600142598152161,
603
+ "learning_rate": 1.877452097060223e-05,
604
+ "loss": 0.0452,
605
+ "step": 42500
606
+ },
607
+ {
608
+ "epoch": 0.6199628022318661,
609
+ "grad_norm": 2.8069839477539062,
610
+ "learning_rate": 1.8760103231015445e-05,
611
+ "loss": 0.048,
612
+ "step": 43000
613
+ },
614
+ {
615
+ "epoch": 0.6271716720252599,
616
+ "grad_norm": 0.8850429058074951,
617
+ "learning_rate": 1.8745685491428656e-05,
618
+ "loss": 0.0474,
619
+ "step": 43500
620
+ },
621
+ {
622
+ "epoch": 0.6343805418186537,
623
+ "grad_norm": 1.063219666481018,
624
+ "learning_rate": 1.8731267751841868e-05,
625
+ "loss": 0.0446,
626
+ "step": 44000
627
+ },
628
+ {
629
+ "epoch": 0.6415894116120474,
630
+ "grad_norm": 1.3925724029541016,
631
+ "learning_rate": 1.871685001225508e-05,
632
+ "loss": 0.0468,
633
+ "step": 44500
634
+ },
635
+ {
636
+ "epoch": 0.6487982814054413,
637
+ "grad_norm": 0.9575428366661072,
638
+ "learning_rate": 1.870243227266829e-05,
639
+ "loss": 0.0447,
640
+ "step": 45000
641
+ },
642
+ {
643
+ "epoch": 0.656007151198835,
644
+ "grad_norm": 2.547752618789673,
645
+ "learning_rate": 1.8688014533081503e-05,
646
+ "loss": 0.0456,
647
+ "step": 45500
648
+ },
649
+ {
650
+ "epoch": 0.6632160209922289,
651
+ "grad_norm": 0.6029974222183228,
652
+ "learning_rate": 1.8673596793494718e-05,
653
+ "loss": 0.0464,
654
+ "step": 46000
655
+ },
656
+ {
657
+ "epoch": 0.6704248907856226,
658
+ "grad_norm": 0.27106812596321106,
659
+ "learning_rate": 1.865917905390793e-05,
660
+ "loss": 0.0437,
661
+ "step": 46500
662
+ },
663
+ {
664
+ "epoch": 0.6776337605790165,
665
+ "grad_norm": 1.3233801126480103,
666
+ "learning_rate": 1.864476131432114e-05,
667
+ "loss": 0.0447,
668
+ "step": 47000
669
+ },
670
+ {
671
+ "epoch": 0.6848426303724102,
672
+ "grad_norm": 0.38903898000717163,
673
+ "learning_rate": 1.8630343574734356e-05,
674
+ "loss": 0.0455,
675
+ "step": 47500
676
+ },
677
+ {
678
+ "epoch": 0.692051500165804,
679
+ "grad_norm": 1.247036337852478,
680
+ "learning_rate": 1.8615925835147568e-05,
681
+ "loss": 0.044,
682
+ "step": 48000
683
+ },
684
+ {
685
+ "epoch": 0.6992603699591978,
686
+ "grad_norm": 0.9771102666854858,
687
+ "learning_rate": 1.860150809556078e-05,
688
+ "loss": 0.0446,
689
+ "step": 48500
690
+ },
691
+ {
692
+ "epoch": 0.7064692397525916,
693
+ "grad_norm": 1.6191680431365967,
694
+ "learning_rate": 1.858709035597399e-05,
695
+ "loss": 0.0455,
696
+ "step": 49000
697
+ },
698
+ {
699
+ "epoch": 0.7136781095459854,
700
+ "grad_norm": 0.9542379975318909,
701
+ "learning_rate": 1.8572672616387203e-05,
702
+ "loss": 0.0426,
703
+ "step": 49500
704
+ },
705
+ {
706
+ "epoch": 0.7208869793393792,
707
+ "grad_norm": 1.6160619258880615,
708
+ "learning_rate": 1.8558254876800418e-05,
709
+ "loss": 0.0433,
710
+ "step": 50000
711
+ },
712
+ {
713
+ "epoch": 0.7280958491327729,
714
+ "grad_norm": 1.1810977458953857,
715
+ "learning_rate": 1.854383713721363e-05,
716
+ "loss": 0.0443,
717
+ "step": 50500
718
+ },
719
+ {
720
+ "epoch": 0.7353047189261668,
721
+ "grad_norm": 1.4848960638046265,
722
+ "learning_rate": 1.852941939762684e-05,
723
+ "loss": 0.0442,
724
+ "step": 51000
725
+ },
726
+ {
727
+ "epoch": 0.7425135887195605,
728
+ "grad_norm": 1.2140188217163086,
729
+ "learning_rate": 1.8515001658040053e-05,
730
+ "loss": 0.0436,
731
+ "step": 51500
732
+ },
733
+ {
734
+ "epoch": 0.7497224585129544,
735
+ "grad_norm": 0.6803346276283264,
736
+ "learning_rate": 1.8500583918453265e-05,
737
+ "loss": 0.0416,
738
+ "step": 52000
739
+ },
740
+ {
741
+ "epoch": 0.7569313283063481,
742
+ "grad_norm": 2.847879409790039,
743
+ "learning_rate": 1.8486166178866477e-05,
744
+ "loss": 0.0401,
745
+ "step": 52500
746
+ },
747
+ {
748
+ "epoch": 0.7641401980997419,
749
+ "grad_norm": 1.3574286699295044,
750
+ "learning_rate": 1.8471748439279692e-05,
751
+ "loss": 0.0426,
752
+ "step": 53000
753
+ },
754
+ {
755
+ "epoch": 0.7713490678931357,
756
+ "grad_norm": 1.5763428211212158,
757
+ "learning_rate": 1.8457330699692903e-05,
758
+ "loss": 0.0416,
759
+ "step": 53500
760
+ },
761
+ {
762
+ "epoch": 0.7785579376865295,
763
+ "grad_norm": 2.006143808364868,
764
+ "learning_rate": 1.8442912960106115e-05,
765
+ "loss": 0.0423,
766
+ "step": 54000
767
+ },
768
+ {
769
+ "epoch": 0.7857668074799233,
770
+ "grad_norm": 2.0041260719299316,
771
+ "learning_rate": 1.842849522051933e-05,
772
+ "loss": 0.043,
773
+ "step": 54500
774
+ },
775
+ {
776
+ "epoch": 0.7929756772733171,
777
+ "grad_norm": 1.0083436965942383,
778
+ "learning_rate": 1.8414077480932542e-05,
779
+ "loss": 0.0428,
780
+ "step": 55000
781
+ },
782
+ {
783
+ "epoch": 0.8001845470667108,
784
+ "grad_norm": 1.2364863157272339,
785
+ "learning_rate": 1.8399659741345754e-05,
786
+ "loss": 0.0431,
787
+ "step": 55500
788
+ },
789
+ {
790
+ "epoch": 0.8073934168601047,
791
+ "grad_norm": 1.1397020816802979,
792
+ "learning_rate": 1.8385242001758965e-05,
793
+ "loss": 0.0408,
794
+ "step": 56000
795
+ },
796
+ {
797
+ "epoch": 0.8146022866534984,
798
+ "grad_norm": 1.046647071838379,
799
+ "learning_rate": 1.8370824262172177e-05,
800
+ "loss": 0.0424,
801
+ "step": 56500
802
+ },
803
+ {
804
+ "epoch": 0.8218111564468923,
805
+ "grad_norm": 0.7180289626121521,
806
+ "learning_rate": 1.8356406522585392e-05,
807
+ "loss": 0.0417,
808
+ "step": 57000
809
+ },
810
+ {
811
+ "epoch": 0.829020026240286,
812
+ "grad_norm": 1.866095781326294,
813
+ "learning_rate": 1.8341988782998604e-05,
814
+ "loss": 0.0406,
815
+ "step": 57500
816
+ },
817
+ {
818
+ "epoch": 0.8362288960336799,
819
+ "grad_norm": 1.7192025184631348,
820
+ "learning_rate": 1.8327571043411815e-05,
821
+ "loss": 0.042,
822
+ "step": 58000
823
+ },
824
+ {
825
+ "epoch": 0.8434377658270736,
826
+ "grad_norm": 1.3043447732925415,
827
+ "learning_rate": 1.8313153303825027e-05,
828
+ "loss": 0.0419,
829
+ "step": 58500
830
+ },
831
+ {
832
+ "epoch": 0.8506466356204674,
833
+ "grad_norm": 2.372190237045288,
834
+ "learning_rate": 1.829873556423824e-05,
835
+ "loss": 0.0421,
836
+ "step": 59000
837
+ },
838
+ {
839
+ "epoch": 0.8578555054138612,
840
+ "grad_norm": 0.9028930068016052,
841
+ "learning_rate": 1.828431782465145e-05,
842
+ "loss": 0.0396,
843
+ "step": 59500
844
+ },
845
+ {
846
+ "epoch": 0.865064375207255,
847
+ "grad_norm": 1.2869058847427368,
848
+ "learning_rate": 1.8269900085064665e-05,
849
+ "loss": 0.0401,
850
+ "step": 60000
851
+ },
852
+ {
853
+ "epoch": 0.8722732450006488,
854
+ "grad_norm": 2.214855670928955,
855
+ "learning_rate": 1.8255482345477877e-05,
856
+ "loss": 0.04,
857
+ "step": 60500
858
+ },
859
+ {
860
+ "epoch": 0.8794821147940426,
861
+ "grad_norm": 0.9826574325561523,
862
+ "learning_rate": 1.824106460589109e-05,
863
+ "loss": 0.0397,
864
+ "step": 61000
865
+ },
866
+ {
867
+ "epoch": 0.8866909845874363,
868
+ "grad_norm": 0.7741074562072754,
869
+ "learning_rate": 1.8226646866304304e-05,
870
+ "loss": 0.0397,
871
+ "step": 61500
872
+ },
873
+ {
874
+ "epoch": 0.8938998543808302,
875
+ "grad_norm": 1.2778081893920898,
876
+ "learning_rate": 1.8212229126717516e-05,
877
+ "loss": 0.0396,
878
+ "step": 62000
879
+ },
880
+ {
881
+ "epoch": 0.9011087241742239,
882
+ "grad_norm": 0.7415226697921753,
883
+ "learning_rate": 1.8197811387130727e-05,
884
+ "loss": 0.0398,
885
+ "step": 62500
886
+ },
887
+ {
888
+ "epoch": 0.9083175939676178,
889
+ "grad_norm": 2.152737617492676,
890
+ "learning_rate": 1.818339364754394e-05,
891
+ "loss": 0.0395,
892
+ "step": 63000
893
+ },
894
+ {
895
+ "epoch": 0.9155264637610115,
896
+ "grad_norm": 0.9719590544700623,
897
+ "learning_rate": 1.816897590795715e-05,
898
+ "loss": 0.0387,
899
+ "step": 63500
900
+ },
901
+ {
902
+ "epoch": 0.9227353335544053,
903
+ "grad_norm": 1.4587551355361938,
904
+ "learning_rate": 1.8154558168370366e-05,
905
+ "loss": 0.0395,
906
+ "step": 64000
907
+ },
908
+ {
909
+ "epoch": 0.9299442033477991,
910
+ "grad_norm": 1.4218809604644775,
911
+ "learning_rate": 1.8140140428783577e-05,
912
+ "loss": 0.0375,
913
+ "step": 64500
914
+ },
915
+ {
916
+ "epoch": 0.9371530731411929,
917
+ "grad_norm": 1.8009737730026245,
918
+ "learning_rate": 1.812572268919679e-05,
919
+ "loss": 0.0387,
920
+ "step": 65000
921
+ },
922
+ {
923
+ "epoch": 0.9443619429345868,
924
+ "grad_norm": 1.2379016876220703,
925
+ "learning_rate": 1.811130494961e-05,
926
+ "loss": 0.0386,
927
+ "step": 65500
928
+ },
929
+ {
930
+ "epoch": 0.9515708127279805,
931
+ "grad_norm": 1.1901589632034302,
932
+ "learning_rate": 1.8096887210023216e-05,
933
+ "loss": 0.0381,
934
+ "step": 66000
935
+ },
936
+ {
937
+ "epoch": 0.9587796825213742,
938
+ "grad_norm": 1.0341569185256958,
939
+ "learning_rate": 1.8082469470436424e-05,
940
+ "loss": 0.0402,
941
+ "step": 66500
942
+ },
943
+ {
944
+ "epoch": 0.9659885523147681,
945
+ "grad_norm": 1.4235957860946655,
946
+ "learning_rate": 1.806805173084964e-05,
947
+ "loss": 0.0382,
948
+ "step": 67000
949
+ },
950
+ {
951
+ "epoch": 0.9731974221081618,
952
+ "grad_norm": 1.095893383026123,
953
+ "learning_rate": 1.805363399126285e-05,
954
+ "loss": 0.0396,
955
+ "step": 67500
956
+ },
957
+ {
958
+ "epoch": 0.9804062919015557,
959
+ "grad_norm": 1.8859561681747437,
960
+ "learning_rate": 1.8039216251676063e-05,
961
+ "loss": 0.038,
962
+ "step": 68000
963
+ },
964
+ {
965
+ "epoch": 0.9876151616949495,
966
+ "grad_norm": 1.8770360946655273,
967
+ "learning_rate": 1.8024798512089278e-05,
968
+ "loss": 0.039,
969
+ "step": 68500
970
+ },
971
+ {
972
+ "epoch": 0.9948240314883433,
973
+ "grad_norm": 1.870827555656433,
974
+ "learning_rate": 1.801038077250249e-05,
975
+ "loss": 0.038,
976
+ "step": 69000
977
+ },
978
+ {
979
+ "epoch": 1.0,
980
+ "eval_f1": 0.9895049158009324,
981
+ "eval_loss": 0.034001659601926804,
982
+ "eval_runtime": 683.1241,
983
+ "eval_samples_per_second": 1528.989,
984
+ "eval_steps_per_second": 47.782,
985
+ "step": 69359
986
+ },
987
+ {
988
+ "epoch": 1.002032901281737,
989
+ "grad_norm": 0.4856395125389099,
990
+ "learning_rate": 1.79959630329157e-05,
991
+ "loss": 0.0352,
992
+ "step": 69500
993
+ },
994
+ {
995
+ "epoch": 1.009241771075131,
996
+ "grad_norm": 1.8835086822509766,
997
+ "learning_rate": 1.7981545293328913e-05,
998
+ "loss": 0.0287,
999
+ "step": 70000
1000
+ },
1001
+ {
1002
+ "epoch": 1.0164506408685245,
1003
+ "grad_norm": 1.941490888595581,
1004
+ "learning_rate": 1.7967127553742124e-05,
1005
+ "loss": 0.0307,
1006
+ "step": 70500
1007
+ },
1008
+ {
1009
+ "epoch": 1.0236595106619184,
1010
+ "grad_norm": 1.525707483291626,
1011
+ "learning_rate": 1.795270981415534e-05,
1012
+ "loss": 0.03,
1013
+ "step": 71000
1014
+ },
1015
+ {
1016
+ "epoch": 1.0308683804553123,
1017
+ "grad_norm": 0.6174446940422058,
1018
+ "learning_rate": 1.793829207456855e-05,
1019
+ "loss": 0.029,
1020
+ "step": 71500
1021
+ },
1022
+ {
1023
+ "epoch": 1.0380772502487061,
1024
+ "grad_norm": 1.043771505355835,
1025
+ "learning_rate": 1.7923874334981763e-05,
1026
+ "loss": 0.0311,
1027
+ "step": 72000
1028
+ },
1029
+ {
1030
+ "epoch": 1.0452861200420998,
1031
+ "grad_norm": 0.28765255212783813,
1032
+ "learning_rate": 1.7909456595394978e-05,
1033
+ "loss": 0.0291,
1034
+ "step": 72500
1035
+ },
1036
+ {
1037
+ "epoch": 1.0524949898354936,
1038
+ "grad_norm": 0.8367669582366943,
1039
+ "learning_rate": 1.789503885580819e-05,
1040
+ "loss": 0.0307,
1041
+ "step": 73000
1042
+ },
1043
+ {
1044
+ "epoch": 1.0597038596288875,
1045
+ "grad_norm": 0.8930952548980713,
1046
+ "learning_rate": 1.7880621116221398e-05,
1047
+ "loss": 0.0297,
1048
+ "step": 73500
1049
+ },
1050
+ {
1051
+ "epoch": 1.066912729422281,
1052
+ "grad_norm": 1.0413399934768677,
1053
+ "learning_rate": 1.7866203376634613e-05,
1054
+ "loss": 0.03,
1055
+ "step": 74000
1056
+ },
1057
+ {
1058
+ "epoch": 1.074121599215675,
1059
+ "grad_norm": 1.1929751634597778,
1060
+ "learning_rate": 1.7851785637047825e-05,
1061
+ "loss": 0.0287,
1062
+ "step": 74500
1063
+ },
1064
+ {
1065
+ "epoch": 1.0813304690090688,
1066
+ "grad_norm": 0.8676954507827759,
1067
+ "learning_rate": 1.7837367897461036e-05,
1068
+ "loss": 0.0307,
1069
+ "step": 75000
1070
+ },
1071
+ {
1072
+ "epoch": 1.0885393388024625,
1073
+ "grad_norm": 0.733383059501648,
1074
+ "learning_rate": 1.782295015787425e-05,
1075
+ "loss": 0.029,
1076
+ "step": 75500
1077
+ },
1078
+ {
1079
+ "epoch": 1.0957482085958563,
1080
+ "grad_norm": 1.005913257598877,
1081
+ "learning_rate": 1.7808532418287463e-05,
1082
+ "loss": 0.0288,
1083
+ "step": 76000
1084
+ },
1085
+ {
1086
+ "epoch": 1.1029570783892502,
1087
+ "grad_norm": 1.4946510791778564,
1088
+ "learning_rate": 1.7794114678700675e-05,
1089
+ "loss": 0.0294,
1090
+ "step": 76500
1091
+ },
1092
+ {
1093
+ "epoch": 1.110165948182644,
1094
+ "grad_norm": 0.966665506362915,
1095
+ "learning_rate": 1.7779696939113886e-05,
1096
+ "loss": 0.0311,
1097
+ "step": 77000
1098
+ },
1099
+ {
1100
+ "epoch": 1.1173748179760377,
1101
+ "grad_norm": 0.8129379749298096,
1102
+ "learning_rate": 1.7765279199527098e-05,
1103
+ "loss": 0.0301,
1104
+ "step": 77500
1105
+ },
1106
+ {
1107
+ "epoch": 1.1245836877694315,
1108
+ "grad_norm": 1.1672717332839966,
1109
+ "learning_rate": 1.7750861459940313e-05,
1110
+ "loss": 0.0297,
1111
+ "step": 78000
1112
+ },
1113
+ {
1114
+ "epoch": 1.1317925575628254,
1115
+ "grad_norm": 1.0149409770965576,
1116
+ "learning_rate": 1.7736443720353525e-05,
1117
+ "loss": 0.031,
1118
+ "step": 78500
1119
+ },
1120
+ {
1121
+ "epoch": 1.139001427356219,
1122
+ "grad_norm": 1.3319754600524902,
1123
+ "learning_rate": 1.7722025980766736e-05,
1124
+ "loss": 0.0294,
1125
+ "step": 79000
1126
+ },
1127
+ {
1128
+ "epoch": 1.1462102971496129,
1129
+ "grad_norm": 3.036787509918213,
1130
+ "learning_rate": 1.770760824117995e-05,
1131
+ "loss": 0.0294,
1132
+ "step": 79500
1133
+ },
1134
+ {
1135
+ "epoch": 1.1534191669430067,
1136
+ "grad_norm": 0.6281238198280334,
1137
+ "learning_rate": 1.7693190501593163e-05,
1138
+ "loss": 0.0312,
1139
+ "step": 80000
1140
+ },
1141
+ {
1142
+ "epoch": 1.1606280367364006,
1143
+ "grad_norm": 1.39284086227417,
1144
+ "learning_rate": 1.767877276200637e-05,
1145
+ "loss": 0.0299,
1146
+ "step": 80500
1147
+ },
1148
+ {
1149
+ "epoch": 1.1678369065297942,
1150
+ "grad_norm": 2.4636764526367188,
1151
+ "learning_rate": 1.7664355022419587e-05,
1152
+ "loss": 0.0304,
1153
+ "step": 81000
1154
+ },
1155
+ {
1156
+ "epoch": 1.175045776323188,
1157
+ "grad_norm": 1.0513309240341187,
1158
+ "learning_rate": 1.7649937282832798e-05,
1159
+ "loss": 0.0293,
1160
+ "step": 81500
1161
+ },
1162
+ {
1163
+ "epoch": 1.182254646116582,
1164
+ "grad_norm": 0.739205539226532,
1165
+ "learning_rate": 1.763551954324601e-05,
1166
+ "loss": 0.0297,
1167
+ "step": 82000
1168
+ },
1169
+ {
1170
+ "epoch": 1.1894635159099756,
1171
+ "grad_norm": 1.1646817922592163,
1172
+ "learning_rate": 1.7621101803659225e-05,
1173
+ "loss": 0.0281,
1174
+ "step": 82500
1175
+ },
1176
+ {
1177
+ "epoch": 1.1966723857033694,
1178
+ "grad_norm": 1.6882481575012207,
1179
+ "learning_rate": 1.7606684064072437e-05,
1180
+ "loss": 0.0308,
1181
+ "step": 83000
1182
+ },
1183
+ {
1184
+ "epoch": 1.2038812554967633,
1185
+ "grad_norm": 2.1905980110168457,
1186
+ "learning_rate": 1.759226632448565e-05,
1187
+ "loss": 0.0301,
1188
+ "step": 83500
1189
+ },
1190
+ {
1191
+ "epoch": 1.211090125290157,
1192
+ "grad_norm": 0.4102253317832947,
1193
+ "learning_rate": 1.757784858489886e-05,
1194
+ "loss": 0.0296,
1195
+ "step": 84000
1196
+ },
1197
+ {
1198
+ "epoch": 1.2182989950835508,
1199
+ "grad_norm": 1.5355827808380127,
1200
+ "learning_rate": 1.7563430845312072e-05,
1201
+ "loss": 0.031,
1202
+ "step": 84500
1203
+ },
1204
+ {
1205
+ "epoch": 1.2255078648769446,
1206
+ "grad_norm": 0.4144400954246521,
1207
+ "learning_rate": 1.7549013105725287e-05,
1208
+ "loss": 0.0303,
1209
+ "step": 85000
1210
+ },
1211
+ {
1212
+ "epoch": 1.2327167346703383,
1213
+ "grad_norm": 0.5286178588867188,
1214
+ "learning_rate": 1.75345953661385e-05,
1215
+ "loss": 0.0311,
1216
+ "step": 85500
1217
+ },
1218
+ {
1219
+ "epoch": 1.2399256044637321,
1220
+ "grad_norm": 1.3401720523834229,
1221
+ "learning_rate": 1.752017762655171e-05,
1222
+ "loss": 0.0303,
1223
+ "step": 86000
1224
+ },
1225
+ {
1226
+ "epoch": 1.247134474257126,
1227
+ "grad_norm": 1.5546993017196655,
1228
+ "learning_rate": 1.7505759886964925e-05,
1229
+ "loss": 0.0296,
1230
+ "step": 86500
1231
+ },
1232
+ {
1233
+ "epoch": 1.2543433440505198,
1234
+ "grad_norm": 1.7993361949920654,
1235
+ "learning_rate": 1.7491342147378137e-05,
1236
+ "loss": 0.03,
1237
+ "step": 87000
1238
+ },
1239
+ {
1240
+ "epoch": 1.2615522138439135,
1241
+ "grad_norm": 1.058311939239502,
1242
+ "learning_rate": 1.7476924407791345e-05,
1243
+ "loss": 0.0283,
1244
+ "step": 87500
1245
+ },
1246
+ {
1247
+ "epoch": 1.2687610836373073,
1248
+ "grad_norm": 1.1616915464401245,
1249
+ "learning_rate": 1.746250666820456e-05,
1250
+ "loss": 0.0306,
1251
+ "step": 88000
1252
+ },
1253
+ {
1254
+ "epoch": 1.2759699534307012,
1255
+ "grad_norm": 1.5120762586593628,
1256
+ "learning_rate": 1.7448088928617772e-05,
1257
+ "loss": 0.0296,
1258
+ "step": 88500
1259
+ },
1260
+ {
1261
+ "epoch": 1.283178823224095,
1262
+ "grad_norm": 1.033087134361267,
1263
+ "learning_rate": 1.7433671189030984e-05,
1264
+ "loss": 0.0296,
1265
+ "step": 89000
1266
+ },
1267
+ {
1268
+ "epoch": 1.2903876930174887,
1269
+ "grad_norm": 0.9456692337989807,
1270
+ "learning_rate": 1.74192534494442e-05,
1271
+ "loss": 0.0293,
1272
+ "step": 89500
1273
+ },
1274
+ {
1275
+ "epoch": 1.2975965628108825,
1276
+ "grad_norm": 0.4252309799194336,
1277
+ "learning_rate": 1.740483570985741e-05,
1278
+ "loss": 0.0287,
1279
+ "step": 90000
1280
+ },
1281
+ {
1282
+ "epoch": 1.3048054326042764,
1283
+ "grad_norm": 1.4315825700759888,
1284
+ "learning_rate": 1.7390417970270622e-05,
1285
+ "loss": 0.0314,
1286
+ "step": 90500
1287
+ },
1288
+ {
1289
+ "epoch": 1.31201430239767,
1290
+ "grad_norm": 0.9023242592811584,
1291
+ "learning_rate": 1.7376000230683834e-05,
1292
+ "loss": 0.0296,
1293
+ "step": 91000
1294
+ },
1295
+ {
1296
+ "epoch": 1.3192231721910639,
1297
+ "grad_norm": 1.8055963516235352,
1298
+ "learning_rate": 1.7361582491097045e-05,
1299
+ "loss": 0.0289,
1300
+ "step": 91500
1301
+ },
1302
+ {
1303
+ "epoch": 1.3264320419844577,
1304
+ "grad_norm": 1.2063618898391724,
1305
+ "learning_rate": 1.734716475151026e-05,
1306
+ "loss": 0.03,
1307
+ "step": 92000
1308
+ },
1309
+ {
1310
+ "epoch": 1.3336409117778514,
1311
+ "grad_norm": 2.5645272731781006,
1312
+ "learning_rate": 1.7332747011923472e-05,
1313
+ "loss": 0.0289,
1314
+ "step": 92500
1315
+ },
1316
+ {
1317
+ "epoch": 1.3408497815712452,
1318
+ "grad_norm": 1.9335203170776367,
1319
+ "learning_rate": 1.7318329272336684e-05,
1320
+ "loss": 0.0285,
1321
+ "step": 93000
1322
+ },
1323
+ {
1324
+ "epoch": 1.348058651364639,
1325
+ "grad_norm": 0.8842147588729858,
1326
+ "learning_rate": 1.73039115327499e-05,
1327
+ "loss": 0.0287,
1328
+ "step": 93500
1329
+ },
1330
+ {
1331
+ "epoch": 1.3552675211580327,
1332
+ "grad_norm": 1.2006937265396118,
1333
+ "learning_rate": 1.728949379316311e-05,
1334
+ "loss": 0.0288,
1335
+ "step": 94000
1336
+ },
1337
+ {
1338
+ "epoch": 1.3624763909514266,
1339
+ "grad_norm": 1.1261006593704224,
1340
+ "learning_rate": 1.7275076053576322e-05,
1341
+ "loss": 0.0293,
1342
+ "step": 94500
1343
+ },
1344
+ {
1345
+ "epoch": 1.3696852607448204,
1346
+ "grad_norm": 1.2065215110778809,
1347
+ "learning_rate": 1.7260658313989534e-05,
1348
+ "loss": 0.0282,
1349
+ "step": 95000
1350
+ },
1351
+ {
1352
+ "epoch": 1.3768941305382143,
1353
+ "grad_norm": 1.8486534357070923,
1354
+ "learning_rate": 1.7246240574402746e-05,
1355
+ "loss": 0.029,
1356
+ "step": 95500
1357
+ },
1358
+ {
1359
+ "epoch": 1.384103000331608,
1360
+ "grad_norm": 0.8908069729804993,
1361
+ "learning_rate": 1.7231822834815957e-05,
1362
+ "loss": 0.0294,
1363
+ "step": 96000
1364
+ },
1365
+ {
1366
+ "epoch": 1.3913118701250018,
1367
+ "grad_norm": 0.6375325918197632,
1368
+ "learning_rate": 1.7217405095229172e-05,
1369
+ "loss": 0.0287,
1370
+ "step": 96500
1371
+ },
1372
+ {
1373
+ "epoch": 1.3985207399183957,
1374
+ "grad_norm": 1.9673434495925903,
1375
+ "learning_rate": 1.7202987355642384e-05,
1376
+ "loss": 0.0282,
1377
+ "step": 97000
1378
+ },
1379
+ {
1380
+ "epoch": 1.4057296097117895,
1381
+ "grad_norm": 1.1606006622314453,
1382
+ "learning_rate": 1.7188569616055596e-05,
1383
+ "loss": 0.0284,
1384
+ "step": 97500
1385
+ },
1386
+ {
1387
+ "epoch": 1.4129384795051831,
1388
+ "grad_norm": 1.003493309020996,
1389
+ "learning_rate": 1.7174151876468807e-05,
1390
+ "loss": 0.0283,
1391
+ "step": 98000
1392
+ },
1393
+ {
1394
+ "epoch": 1.420147349298577,
1395
+ "grad_norm": 0.9186868071556091,
1396
+ "learning_rate": 1.715973413688202e-05,
1397
+ "loss": 0.0277,
1398
+ "step": 98500
1399
+ },
1400
+ {
1401
+ "epoch": 1.4273562190919709,
1402
+ "grad_norm": 1.3305683135986328,
1403
+ "learning_rate": 1.7145316397295234e-05,
1404
+ "loss": 0.0292,
1405
+ "step": 99000
1406
+ },
1407
+ {
1408
+ "epoch": 1.4345650888853645,
1409
+ "grad_norm": 1.3776835203170776,
1410
+ "learning_rate": 1.7130898657708446e-05,
1411
+ "loss": 0.0286,
1412
+ "step": 99500
1413
+ },
1414
+ {
1415
+ "epoch": 1.4417739586787583,
1416
+ "grad_norm": 1.6687921285629272,
1417
+ "learning_rate": 1.7116480918121658e-05,
1418
+ "loss": 0.029,
1419
+ "step": 100000
1420
+ },
1421
+ {
1422
+ "epoch": 1.4489828284721522,
1423
+ "grad_norm": 1.9249308109283447,
1424
+ "learning_rate": 1.7102063178534873e-05,
1425
+ "loss": 0.0262,
1426
+ "step": 100500
1427
+ },
1428
+ {
1429
+ "epoch": 1.4561916982655458,
1430
+ "grad_norm": 1.1834752559661865,
1431
+ "learning_rate": 1.7087645438948084e-05,
1432
+ "loss": 0.0294,
1433
+ "step": 101000
1434
+ },
1435
+ {
1436
+ "epoch": 1.4634005680589397,
1437
+ "grad_norm": 2.1350696086883545,
1438
+ "learning_rate": 1.7073227699361296e-05,
1439
+ "loss": 0.0276,
1440
+ "step": 101500
1441
+ },
1442
+ {
1443
+ "epoch": 1.4706094378523336,
1444
+ "grad_norm": 2.563725709915161,
1445
+ "learning_rate": 1.7058809959774508e-05,
1446
+ "loss": 0.0276,
1447
+ "step": 102000
1448
+ },
1449
+ {
1450
+ "epoch": 1.4778183076457272,
1451
+ "grad_norm": 0.9226647019386292,
1452
+ "learning_rate": 1.704439222018772e-05,
1453
+ "loss": 0.0284,
1454
+ "step": 102500
1455
+ },
1456
+ {
1457
+ "epoch": 1.485027177439121,
1458
+ "grad_norm": 0.34231990575790405,
1459
+ "learning_rate": 1.702997448060093e-05,
1460
+ "loss": 0.0281,
1461
+ "step": 103000
1462
+ },
1463
+ {
1464
+ "epoch": 1.492236047232515,
1465
+ "grad_norm": 2.339191436767578,
1466
+ "learning_rate": 1.7015556741014146e-05,
1467
+ "loss": 0.029,
1468
+ "step": 103500
1469
+ },
1470
+ {
1471
+ "epoch": 1.4994449170259085,
1472
+ "grad_norm": 1.7756520509719849,
1473
+ "learning_rate": 1.7001139001427358e-05,
1474
+ "loss": 0.0288,
1475
+ "step": 104000
1476
+ },
1477
+ {
1478
+ "epoch": 1.5066537868193026,
1479
+ "grad_norm": 2.0807387828826904,
1480
+ "learning_rate": 1.698672126184057e-05,
1481
+ "loss": 0.0281,
1482
+ "step": 104500
1483
+ },
1484
+ {
1485
+ "epoch": 1.5138626566126963,
1486
+ "grad_norm": 1.4787542819976807,
1487
+ "learning_rate": 1.6972303522253785e-05,
1488
+ "loss": 0.0284,
1489
+ "step": 105000
1490
+ },
1491
+ {
1492
+ "epoch": 1.52107152640609,
1493
+ "grad_norm": 1.719581961631775,
1494
+ "learning_rate": 1.6957885782666993e-05,
1495
+ "loss": 0.0287,
1496
+ "step": 105500
1497
+ },
1498
+ {
1499
+ "epoch": 1.528280396199484,
1500
+ "grad_norm": 0.8158332109451294,
1501
+ "learning_rate": 1.6943468043080208e-05,
1502
+ "loss": 0.029,
1503
+ "step": 106000
1504
+ },
1505
+ {
1506
+ "epoch": 1.5354892659928776,
1507
+ "grad_norm": 0.10212863981723785,
1508
+ "learning_rate": 1.692905030349342e-05,
1509
+ "loss": 0.0275,
1510
+ "step": 106500
1511
+ },
1512
+ {
1513
+ "epoch": 1.5426981357862715,
1514
+ "grad_norm": 1.0970171689987183,
1515
+ "learning_rate": 1.691463256390663e-05,
1516
+ "loss": 0.0282,
1517
+ "step": 107000
1518
+ },
1519
+ {
1520
+ "epoch": 1.5499070055796653,
1521
+ "grad_norm": 0.4221758246421814,
1522
+ "learning_rate": 1.6900214824319846e-05,
1523
+ "loss": 0.0285,
1524
+ "step": 107500
1525
+ },
1526
+ {
1527
+ "epoch": 1.557115875373059,
1528
+ "grad_norm": 1.5400525331497192,
1529
+ "learning_rate": 1.6885797084733058e-05,
1530
+ "loss": 0.0282,
1531
+ "step": 108000
1532
+ },
1533
+ {
1534
+ "epoch": 1.5643247451664528,
1535
+ "grad_norm": 1.6638318300247192,
1536
+ "learning_rate": 1.687137934514627e-05,
1537
+ "loss": 0.0301,
1538
+ "step": 108500
1539
+ },
1540
+ {
1541
+ "epoch": 1.5715336149598467,
1542
+ "grad_norm": 1.3407906293869019,
1543
+ "learning_rate": 1.685696160555948e-05,
1544
+ "loss": 0.0276,
1545
+ "step": 109000
1546
+ },
1547
+ {
1548
+ "epoch": 1.5787424847532403,
1549
+ "grad_norm": 0.8864063024520874,
1550
+ "learning_rate": 1.6842543865972693e-05,
1551
+ "loss": 0.0273,
1552
+ "step": 109500
1553
+ },
1554
+ {
1555
+ "epoch": 1.5859513545466342,
1556
+ "grad_norm": 1.5699615478515625,
1557
+ "learning_rate": 1.6828126126385905e-05,
1558
+ "loss": 0.0267,
1559
+ "step": 110000
1560
+ },
1561
+ {
1562
+ "epoch": 1.593160224340028,
1563
+ "grad_norm": 0.20337066054344177,
1564
+ "learning_rate": 1.681370838679912e-05,
1565
+ "loss": 0.0285,
1566
+ "step": 110500
1567
+ },
1568
+ {
1569
+ "epoch": 1.6003690941334217,
1570
+ "grad_norm": 0.7260587811470032,
1571
+ "learning_rate": 1.679929064721233e-05,
1572
+ "loss": 0.028,
1573
+ "step": 111000
1574
+ },
1575
+ {
1576
+ "epoch": 1.6075779639268155,
1577
+ "grad_norm": 0.434865266084671,
1578
+ "learning_rate": 1.6784872907625543e-05,
1579
+ "loss": 0.027,
1580
+ "step": 111500
1581
+ },
1582
+ {
1583
+ "epoch": 1.6147868337202094,
1584
+ "grad_norm": 1.0067859888076782,
1585
+ "learning_rate": 1.677045516803876e-05,
1586
+ "loss": 0.0276,
1587
+ "step": 112000
1588
+ },
1589
+ {
1590
+ "epoch": 1.621995703513603,
1591
+ "grad_norm": 1.7014882564544678,
1592
+ "learning_rate": 1.6756037428451967e-05,
1593
+ "loss": 0.0276,
1594
+ "step": 112500
1595
+ },
1596
+ {
1597
+ "epoch": 1.629204573306997,
1598
+ "grad_norm": 1.2809230089187622,
1599
+ "learning_rate": 1.674161968886518e-05,
1600
+ "loss": 0.0276,
1601
+ "step": 113000
1602
+ },
1603
+ {
1604
+ "epoch": 1.6364134431003907,
1605
+ "grad_norm": 1.2574232816696167,
1606
+ "learning_rate": 1.6727201949278393e-05,
1607
+ "loss": 0.0284,
1608
+ "step": 113500
1609
+ },
1610
+ {
1611
+ "epoch": 1.6436223128937844,
1612
+ "grad_norm": 1.3797274827957153,
1613
+ "learning_rate": 1.6712784209691605e-05,
1614
+ "loss": 0.0282,
1615
+ "step": 114000
1616
+ },
1617
+ {
1618
+ "epoch": 1.6508311826871784,
1619
+ "grad_norm": 0.32101693749427795,
1620
+ "learning_rate": 1.669836647010482e-05,
1621
+ "loss": 0.0274,
1622
+ "step": 114500
1623
+ },
1624
+ {
1625
+ "epoch": 1.658040052480572,
1626
+ "grad_norm": 0.41121360659599304,
1627
+ "learning_rate": 1.6683948730518032e-05,
1628
+ "loss": 0.0286,
1629
+ "step": 115000
1630
+ },
1631
+ {
1632
+ "epoch": 1.665248922273966,
1633
+ "grad_norm": 0.5161770582199097,
1634
+ "learning_rate": 1.6669530990931243e-05,
1635
+ "loss": 0.0271,
1636
+ "step": 115500
1637
+ },
1638
+ {
1639
+ "epoch": 1.6724577920673598,
1640
+ "grad_norm": 1.153785228729248,
1641
+ "learning_rate": 1.6655113251344455e-05,
1642
+ "loss": 0.0264,
1643
+ "step": 116000
1644
+ },
1645
+ {
1646
+ "epoch": 1.6796666618607534,
1647
+ "grad_norm": 1.5621336698532104,
1648
+ "learning_rate": 1.6640695511757667e-05,
1649
+ "loss": 0.0272,
1650
+ "step": 116500
1651
+ },
1652
+ {
1653
+ "epoch": 1.6868755316541473,
1654
+ "grad_norm": 2.4250948429107666,
1655
+ "learning_rate": 1.662627777217088e-05,
1656
+ "loss": 0.0282,
1657
+ "step": 117000
1658
+ },
1659
+ {
1660
+ "epoch": 1.6940844014475411,
1661
+ "grad_norm": 0.24833956360816956,
1662
+ "learning_rate": 1.6611860032584094e-05,
1663
+ "loss": 0.0279,
1664
+ "step": 117500
1665
+ },
1666
+ {
1667
+ "epoch": 1.7012932712409348,
1668
+ "grad_norm": 2.7739059925079346,
1669
+ "learning_rate": 1.6597442292997305e-05,
1670
+ "loss": 0.0283,
1671
+ "step": 118000
1672
+ },
1673
+ {
1674
+ "epoch": 1.7085021410343286,
1675
+ "grad_norm": 0.29604852199554443,
1676
+ "learning_rate": 1.6583024553410517e-05,
1677
+ "loss": 0.0271,
1678
+ "step": 118500
1679
+ },
1680
+ {
1681
+ "epoch": 1.7157110108277225,
1682
+ "grad_norm": 1.0948668718338013,
1683
+ "learning_rate": 1.6568606813823732e-05,
1684
+ "loss": 0.0269,
1685
+ "step": 119000
1686
+ },
1687
+ {
1688
+ "epoch": 1.7229198806211161,
1689
+ "grad_norm": 0.20236891508102417,
1690
+ "learning_rate": 1.655418907423694e-05,
1691
+ "loss": 0.0264,
1692
+ "step": 119500
1693
+ },
1694
+ {
1695
+ "epoch": 1.73012875041451,
1696
+ "grad_norm": 0.9090920090675354,
1697
+ "learning_rate": 1.6539771334650155e-05,
1698
+ "loss": 0.0282,
1699
+ "step": 120000
1700
+ },
1701
+ {
1702
+ "epoch": 1.7373376202079038,
1703
+ "grad_norm": 2.128474473953247,
1704
+ "learning_rate": 1.6525353595063367e-05,
1705
+ "loss": 0.0283,
1706
+ "step": 120500
1707
+ },
1708
+ {
1709
+ "epoch": 1.7445464900012975,
1710
+ "grad_norm": 1.6552634239196777,
1711
+ "learning_rate": 1.651093585547658e-05,
1712
+ "loss": 0.0272,
1713
+ "step": 121000
1714
+ },
1715
+ {
1716
+ "epoch": 1.7517553597946915,
1717
+ "grad_norm": 0.7921839356422424,
1718
+ "learning_rate": 1.6496518115889794e-05,
1719
+ "loss": 0.0301,
1720
+ "step": 121500
1721
+ },
1722
+ {
1723
+ "epoch": 1.7589642295880852,
1724
+ "grad_norm": 0.8467416763305664,
1725
+ "learning_rate": 1.6482100376303006e-05,
1726
+ "loss": 0.0266,
1727
+ "step": 122000
1728
+ },
1729
+ {
1730
+ "epoch": 1.7661730993814788,
1731
+ "grad_norm": 1.4604544639587402,
1732
+ "learning_rate": 1.6467682636716217e-05,
1733
+ "loss": 0.0253,
1734
+ "step": 122500
1735
+ },
1736
+ {
1737
+ "epoch": 1.773381969174873,
1738
+ "grad_norm": 0.677890956401825,
1739
+ "learning_rate": 1.645326489712943e-05,
1740
+ "loss": 0.0266,
1741
+ "step": 123000
1742
+ },
1743
+ {
1744
+ "epoch": 1.7805908389682665,
1745
+ "grad_norm": 0.2728472352027893,
1746
+ "learning_rate": 1.643884715754264e-05,
1747
+ "loss": 0.027,
1748
+ "step": 123500
1749
+ },
1750
+ {
1751
+ "epoch": 1.7877997087616604,
1752
+ "grad_norm": 1.2005136013031006,
1753
+ "learning_rate": 1.6424429417955852e-05,
1754
+ "loss": 0.0265,
1755
+ "step": 124000
1756
+ },
1757
+ {
1758
+ "epoch": 1.7950085785550542,
1759
+ "grad_norm": 2.1395583152770996,
1760
+ "learning_rate": 1.6410011678369067e-05,
1761
+ "loss": 0.0285,
1762
+ "step": 124500
1763
+ },
1764
+ {
1765
+ "epoch": 1.8022174483484479,
1766
+ "grad_norm": 1.5524953603744507,
1767
+ "learning_rate": 1.639559393878228e-05,
1768
+ "loss": 0.026,
1769
+ "step": 125000
1770
+ },
1771
+ {
1772
+ "epoch": 1.8094263181418417,
1773
+ "grad_norm": 1.5434062480926514,
1774
+ "learning_rate": 1.638117619919549e-05,
1775
+ "loss": 0.0272,
1776
+ "step": 125500
1777
+ },
1778
+ {
1779
+ "epoch": 1.8166351879352356,
1780
+ "grad_norm": 1.4732664823532104,
1781
+ "learning_rate": 1.6366758459608706e-05,
1782
+ "loss": 0.0264,
1783
+ "step": 126000
1784
+ },
1785
+ {
1786
+ "epoch": 1.8238440577286292,
1787
+ "grad_norm": 0.5316962599754333,
1788
+ "learning_rate": 1.6352340720021914e-05,
1789
+ "loss": 0.0262,
1790
+ "step": 126500
1791
+ },
1792
+ {
1793
+ "epoch": 1.831052927522023,
1794
+ "grad_norm": 0.09009312838315964,
1795
+ "learning_rate": 1.633792298043513e-05,
1796
+ "loss": 0.0272,
1797
+ "step": 127000
1798
+ },
1799
+ {
1800
+ "epoch": 1.838261797315417,
1801
+ "grad_norm": 1.211990475654602,
1802
+ "learning_rate": 1.632350524084834e-05,
1803
+ "loss": 0.0272,
1804
+ "step": 127500
1805
+ },
1806
+ {
1807
+ "epoch": 1.8454706671088106,
1808
+ "grad_norm": 1.1306172609329224,
1809
+ "learning_rate": 1.6309087501261552e-05,
1810
+ "loss": 0.0268,
1811
+ "step": 128000
1812
+ },
1813
+ {
1814
+ "epoch": 1.8526795369022044,
1815
+ "grad_norm": 1.8232672214508057,
1816
+ "learning_rate": 1.6294669761674768e-05,
1817
+ "loss": 0.0282,
1818
+ "step": 128500
1819
+ },
1820
+ {
1821
+ "epoch": 1.8598884066955983,
1822
+ "grad_norm": 2.736703395843506,
1823
+ "learning_rate": 1.628025202208798e-05,
1824
+ "loss": 0.0271,
1825
+ "step": 129000
1826
+ },
1827
+ {
1828
+ "epoch": 1.867097276488992,
1829
+ "grad_norm": 2.2017531394958496,
1830
+ "learning_rate": 1.626583428250119e-05,
1831
+ "loss": 0.0264,
1832
+ "step": 129500
1833
+ },
1834
+ {
1835
+ "epoch": 1.874306146282386,
1836
+ "grad_norm": 0.6630580425262451,
1837
+ "learning_rate": 1.6251416542914403e-05,
1838
+ "loss": 0.0268,
1839
+ "step": 130000
1840
+ },
1841
+ {
1842
+ "epoch": 1.8815150160757796,
1843
+ "grad_norm": 0.2576875388622284,
1844
+ "learning_rate": 1.6236998803327614e-05,
1845
+ "loss": 0.0275,
1846
+ "step": 130500
1847
+ },
1848
+ {
1849
+ "epoch": 1.8887238858691733,
1850
+ "grad_norm": 0.625859260559082,
1851
+ "learning_rate": 1.6222581063740826e-05,
1852
+ "loss": 0.0263,
1853
+ "step": 131000
1854
+ },
1855
+ {
1856
+ "epoch": 1.8959327556625674,
1857
+ "grad_norm": 2.3079171180725098,
1858
+ "learning_rate": 1.620816332415404e-05,
1859
+ "loss": 0.0266,
1860
+ "step": 131500
1861
+ },
1862
+ {
1863
+ "epoch": 1.903141625455961,
1864
+ "grad_norm": 0.8551648259162903,
1865
+ "learning_rate": 1.6193745584567253e-05,
1866
+ "loss": 0.0268,
1867
+ "step": 132000
1868
+ },
1869
+ {
1870
+ "epoch": 1.9103504952493549,
1871
+ "grad_norm": 1.2068754434585571,
1872
+ "learning_rate": 1.6179327844980464e-05,
1873
+ "loss": 0.0276,
1874
+ "step": 132500
1875
+ },
1876
+ {
1877
+ "epoch": 1.9175593650427487,
1878
+ "grad_norm": 0.4594031274318695,
1879
+ "learning_rate": 1.616491010539368e-05,
1880
+ "loss": 0.0271,
1881
+ "step": 133000
1882
+ },
1883
+ {
1884
+ "epoch": 1.9247682348361423,
1885
+ "grad_norm": 0.5821360945701599,
1886
+ "learning_rate": 1.6150492365806888e-05,
1887
+ "loss": 0.0267,
1888
+ "step": 133500
1889
+ },
1890
+ {
1891
+ "epoch": 1.9319771046295362,
1892
+ "grad_norm": 0.5188286304473877,
1893
+ "learning_rate": 1.6136074626220103e-05,
1894
+ "loss": 0.027,
1895
+ "step": 134000
1896
+ },
1897
+ {
1898
+ "epoch": 1.93918597442293,
1899
+ "grad_norm": 1.6506882905960083,
1900
+ "learning_rate": 1.6121656886633315e-05,
1901
+ "loss": 0.026,
1902
+ "step": 134500
1903
+ },
1904
+ {
1905
+ "epoch": 1.9463948442163237,
1906
+ "grad_norm": 1.5678963661193848,
1907
+ "learning_rate": 1.6107239147046526e-05,
1908
+ "loss": 0.0264,
1909
+ "step": 135000
1910
+ },
1911
+ {
1912
+ "epoch": 1.9536037140097176,
1913
+ "grad_norm": 0.3626735210418701,
1914
+ "learning_rate": 1.609282140745974e-05,
1915
+ "loss": 0.0264,
1916
+ "step": 135500
1917
+ },
1918
+ {
1919
+ "epoch": 1.9608125838031114,
1920
+ "grad_norm": 0.48542195558547974,
1921
+ "learning_rate": 1.6078403667872953e-05,
1922
+ "loss": 0.0257,
1923
+ "step": 136000
1924
+ },
1925
+ {
1926
+ "epoch": 1.968021453596505,
1927
+ "grad_norm": 0.93156498670578,
1928
+ "learning_rate": 1.6063985928286165e-05,
1929
+ "loss": 0.0274,
1930
+ "step": 136500
1931
+ },
1932
+ {
1933
+ "epoch": 1.975230323389899,
1934
+ "grad_norm": 0.6599089503288269,
1935
+ "learning_rate": 1.6049568188699376e-05,
1936
+ "loss": 0.0253,
1937
+ "step": 137000
1938
+ },
1939
+ {
1940
+ "epoch": 1.9824391931832928,
1941
+ "grad_norm": 2.511162519454956,
1942
+ "learning_rate": 1.6035150449112588e-05,
1943
+ "loss": 0.0264,
1944
+ "step": 137500
1945
+ },
1946
+ {
1947
+ "epoch": 1.9896480629766864,
1948
+ "grad_norm": 0.7365297675132751,
1949
+ "learning_rate": 1.6020732709525803e-05,
1950
+ "loss": 0.0263,
1951
+ "step": 138000
1952
+ },
1953
+ {
1954
+ "epoch": 1.9968569327700805,
1955
+ "grad_norm": 0.9106433391571045,
1956
+ "learning_rate": 1.6006314969939015e-05,
1957
+ "loss": 0.027,
1958
+ "step": 138500
1959
+ },
1960
+ {
1961
+ "epoch": 2.0,
1962
+ "eval_f1": 0.9915470627263667,
1963
+ "eval_loss": 0.02749801054596901,
1964
+ "eval_runtime": 1640.2112,
1965
+ "eval_samples_per_second": 636.802,
1966
+ "eval_steps_per_second": 19.9,
1967
+ "step": 138718
1968
+ }
1969
+ ],
1970
+ "logging_steps": 500,
1971
+ "max_steps": 693590,
1972
+ "num_input_tokens_seen": 0,
1973
+ "num_train_epochs": 10,
1974
+ "save_steps": 1000.0,
1975
+ "stateful_callbacks": {
1976
+ "EarlyStoppingCallback": {
1977
+ "args": {
1978
+ "early_stopping_patience": 5,
1979
+ "early_stopping_threshold": 0.005
1980
+ },
1981
+ "attributes": {
1982
+ "early_stopping_patience_counter": 1
1983
+ }
1984
+ },
1985
+ "TrainerControl": {
1986
+ "args": {
1987
+ "should_epoch_stop": false,
1988
+ "should_evaluate": false,
1989
+ "should_log": false,
1990
+ "should_save": true,
1991
+ "should_training_stop": false
1992
+ },
1993
+ "attributes": {}
1994
+ }
1995
+ },
1996
+ "total_flos": 2.35149119187216e+18,
1997
+ "train_batch_size": 64,
1998
+ "trial_name": null,
1999
+ "trial_params": null
2000
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:709ac35729d9783889224c2b6c62438ba17f34adeb2fd849eb815a049cead0d7
3
+ size 5841