artemis13fowl commited on
Commit
66ed7f3
·
verified ·
1 Parent(s): 47e1bcb

Training in progress, epoch 1

Browse files
Files changed (3) hide show
  1. config.json +490 -0
  2. model.safetensors +3 -0
  3. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForSequenceClassification"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "id2label": {
24
+ "0": "Lua",
25
+ "1": "KiCad Layout",
26
+ "10": "Rust",
27
+ "100": "Unix Assembly",
28
+ "101": "OpenType Feature File",
29
+ "102": "Prolog",
30
+ "103": "Protocol Buffer Text Format",
31
+ "104": "HTML+Razor",
32
+ "105": "Fortran Free Form",
33
+ "106": "Logtalk",
34
+ "107": "Kit",
35
+ "108": "Graphviz (DOT)",
36
+ "109": "Erlang",
37
+ "11": "Scheme",
38
+ "110": "C",
39
+ "111": "LLVM",
40
+ "112": "Visual Basic .NET",
41
+ "113": "PHP",
42
+ "114": "J",
43
+ "115": "Ragel in Ruby Host",
44
+ "116": "CoffeeScript",
45
+ "117": "PlantUML",
46
+ "118": "Vim Script",
47
+ "119": "Go",
48
+ "12": "FreeBasic",
49
+ "120": "Vim Snippet",
50
+ "121": "R",
51
+ "122": "Dockerfile",
52
+ "123": "Lex",
53
+ "124": "G-code",
54
+ "125": "Scilab",
55
+ "126": "Csound",
56
+ "127": "Redcode",
57
+ "128": "VCL",
58
+ "129": "Perl",
59
+ "13": "Starlark",
60
+ "130": "Java",
61
+ "131": "Csound Document",
62
+ "132": "Julia",
63
+ "133": "Turtle",
64
+ "134": "Gherkin",
65
+ "135": "Smali",
66
+ "136": "C++",
67
+ "137": "Python",
68
+ "138": "CODEOWNERS",
69
+ "139": "Io",
70
+ "14": "D",
71
+ "140": "GDB",
72
+ "141": "Makefile",
73
+ "142": "Common Lisp",
74
+ "143": "Apex",
75
+ "144": "PostScript",
76
+ "145": "Edoid",
77
+ "146": "Unity3D Asset",
78
+ "147": "OpenEdge ABL",
79
+ "148": "Open Policy Agent",
80
+ "149": "Sass",
81
+ "15": "Raw token data",
82
+ "150": "Rascal",
83
+ "151": "Lean",
84
+ "152": "Assembly",
85
+ "153": "Inform 7",
86
+ "154": "Lua",
87
+ "155": "Chapel",
88
+ "156": "KiCad Layout",
89
+ "157": "Nim",
90
+ "158": "Ignore List",
91
+ "159": "Objective-C++",
92
+ "16": "Modelica",
93
+ "160": "Squirrel",
94
+ "161": "Ruby",
95
+ "162": "Objective-C",
96
+ "163": "Rust",
97
+ "164": "FreeBasic",
98
+ "165": "Scheme",
99
+ "166": "Starlark",
100
+ "167": "D",
101
+ "168": "Raw token data",
102
+ "169": "Modelica",
103
+ "17": "PicoLisp",
104
+ "170": "Pickle",
105
+ "171": "SCSS",
106
+ "172": "Shell",
107
+ "173": "PowerShell",
108
+ "174": "TSX",
109
+ "175": "C#",
110
+ "176": "Gnuplot",
111
+ "177": "Haxe",
112
+ "178": "Go Module",
113
+ "179": "Vue",
114
+ "18": "SCSS",
115
+ "180": "ObjDump",
116
+ "181": "SQL",
117
+ "182": "AsciiDoc",
118
+ "183": "Fish",
119
+ "184": "Wavefront Object",
120
+ "185": "DIGITAL Command Language",
121
+ "186": "Thrift",
122
+ "187": "ApacheConf",
123
+ "188": "Gerber Image",
124
+ "189": "YANG",
125
+ "19": "Shell",
126
+ "190": "Groovy",
127
+ "191": "MATLAB",
128
+ "192": "Bluespec",
129
+ "193": "POV-Ray SDL",
130
+ "194": "Solidity",
131
+ "195": "Dart",
132
+ "196": "VBScript",
133
+ "197": "Fluent",
134
+ "198": "SQF",
135
+ "199": "Gettext Catalog",
136
+ "2": "Nim",
137
+ "20": "PowerShell",
138
+ "200": "Haskell",
139
+ "201": "ImageJ Macro",
140
+ "202": "PureScript",
141
+ "203": "GDScript",
142
+ "204": "Inno Setup",
143
+ "205": "ColdFusion",
144
+ "206": "TSV",
145
+ "207": "Visual Basic",
146
+ "208": "CMake",
147
+ "209": "Eagle",
148
+ "21": "TSX",
149
+ "210": "Blade",
150
+ "211": "OCaml",
151
+ "212": "LookML",
152
+ "213": "VHDL",
153
+ "214": "HCL",
154
+ "215": "GLSL",
155
+ "216": "RDoc",
156
+ "217": "Metal",
157
+ "218": "Isabelle",
158
+ "219": "TypeScript",
159
+ "22": "C#",
160
+ "220": "Swift",
161
+ "221": "Stylus",
162
+ "222": "Ada",
163
+ "223": "Crystal",
164
+ "224": "Less",
165
+ "225": "Verilog",
166
+ "226": "XML Property List",
167
+ "227": "Hoon",
168
+ "228": "Hack",
169
+ "229": "PLpgSQL",
170
+ "23": "Gnuplot",
171
+ "230": "Scala",
172
+ "231": "FreeMarker",
173
+ "232": "Kotlin",
174
+ "233": "GAS",
175
+ "234": "Twig",
176
+ "235": "EJS",
177
+ "236": "JavaScript",
178
+ "237": "Gradle",
179
+ "238": "ECL",
180
+ "239": "Jupyter Notebook",
181
+ "24": "Haxe",
182
+ "240": "OpenType Feature File",
183
+ "241": "Protocol Buffer Text Format",
184
+ "242": "HTML+Razor",
185
+ "243": "Fortran Free Form",
186
+ "244": "Logtalk",
187
+ "245": "Kit",
188
+ "246": "Graphviz (DOT)",
189
+ "247": "Erlang",
190
+ "248": "C",
191
+ "249": "LLVM",
192
+ "25": "Go Module",
193
+ "250": "Visual Basic .NET",
194
+ "251": "PHP",
195
+ "252": "J",
196
+ "253": "Ragel in Ruby Host",
197
+ "254": "CoffeeScript",
198
+ "255": "PlantUML",
199
+ "256": "Vim Script",
200
+ "257": "Go",
201
+ "258": "Vim Snippet",
202
+ "259": "R",
203
+ "26": "Vue",
204
+ "260": "Dockerfile",
205
+ "261": "Lex",
206
+ "262": "G-code",
207
+ "263": "Scilab",
208
+ "264": "Perl",
209
+ "265": "Java",
210
+ "266": "Julia",
211
+ "267": "Turtle",
212
+ "268": "Gherkin",
213
+ "269": "C++",
214
+ "27": "ObjDump",
215
+ "270": "Python",
216
+ "271": "Io",
217
+ "272": "GDB",
218
+ "273": "Makefile",
219
+ "274": "Common Lisp",
220
+ "275": "Apex",
221
+ "276": "PostScript",
222
+ "277": "Edoid",
223
+ "278": "Unity3D Asset",
224
+ "279": "OpenEdge ABL",
225
+ "28": "SQL",
226
+ "280": "Open Policy Agent",
227
+ "281": "Sass",
228
+ "282": "Rascal",
229
+ "283": "Lean",
230
+ "284": "Assembly",
231
+ "29": "AsciiDoc",
232
+ "3": "Chapel",
233
+ "30": "Fish",
234
+ "31": "Pascal",
235
+ "32": "Wavefront Object",
236
+ "33": "DIGITAL Command Language",
237
+ "34": "Thrift",
238
+ "35": "ApacheConf",
239
+ "36": "Gerber Image",
240
+ "37": "YANG",
241
+ "38": "Mathematica",
242
+ "39": "OpenStep Property List",
243
+ "4": "Ignore List",
244
+ "40": "GAP",
245
+ "41": "Groovy",
246
+ "42": "MATLAB",
247
+ "43": "Adobe Font Metrics",
248
+ "44": "Bluespec",
249
+ "45": "POV-Ray SDL",
250
+ "46": "Solidity",
251
+ "47": "Dart",
252
+ "48": "XS",
253
+ "49": "VBScript",
254
+ "5": "Objective-C++",
255
+ "50": "Fluent",
256
+ "51": "SQF",
257
+ "52": "Gettext Catalog",
258
+ "53": "JAR Manifest",
259
+ "54": "Haskell",
260
+ "55": "ImageJ Macro",
261
+ "56": "Ioke",
262
+ "57": "PureScript",
263
+ "58": "REALbasic",
264
+ "59": "GDScript",
265
+ "6": "Squirrel",
266
+ "60": "Inno Setup",
267
+ "61": "ColdFusion",
268
+ "62": "TSV",
269
+ "63": "Visual Basic",
270
+ "64": "Kvlang",
271
+ "65": "CMake",
272
+ "66": "Eagle",
273
+ "67": "Git Config",
274
+ "68": "Blade",
275
+ "69": "OCaml",
276
+ "7": "AGS Script",
277
+ "70": "HCL",
278
+ "71": "VHDL",
279
+ "72": "LookML",
280
+ "73": "GLSL",
281
+ "74": "RDoc",
282
+ "75": "Metal",
283
+ "76": "Isabelle",
284
+ "77": "TypeScript",
285
+ "78": "Swift",
286
+ "79": "Stylus",
287
+ "8": "Ruby",
288
+ "80": "Ada",
289
+ "81": "Crystal",
290
+ "82": "Less",
291
+ "83": "JQ",
292
+ "84": "Verilog",
293
+ "85": "XML Property List",
294
+ "86": "Hoon",
295
+ "87": "Hack",
296
+ "88": "PLpgSQL",
297
+ "89": "Scala",
298
+ "9": "Objective-C",
299
+ "90": "FreeMarker",
300
+ "91": "Unknown",
301
+ "92": "Kotlin",
302
+ "93": "GAS",
303
+ "94": "Twig",
304
+ "95": "EJS",
305
+ "96": "JavaScript",
306
+ "97": "Gradle",
307
+ "98": "ECL",
308
+ "99": "Jupyter Notebook"
309
+ },
310
+ "initializer_cutoff_factor": 2.0,
311
+ "initializer_range": 0.02,
312
+ "intermediate_size": 1152,
313
+ "label2id": {
314
+ "AGS Script": "7",
315
+ "Ada": "222",
316
+ "Adobe Font Metrics": "43",
317
+ "ApacheConf": "187",
318
+ "Apex": "275",
319
+ "AsciiDoc": "182",
320
+ "Assembly": "284",
321
+ "Blade": "210",
322
+ "Bluespec": "192",
323
+ "C": "248",
324
+ "C#": "175",
325
+ "C++": "269",
326
+ "CMake": "208",
327
+ "CODEOWNERS": "138",
328
+ "Chapel": "155",
329
+ "CoffeeScript": "254",
330
+ "ColdFusion": "205",
331
+ "Common Lisp": "274",
332
+ "Crystal": "223",
333
+ "Csound": "126",
334
+ "Csound Document": "131",
335
+ "D": "167",
336
+ "DIGITAL Command Language": "185",
337
+ "Dart": "195",
338
+ "Dockerfile": "260",
339
+ "ECL": "238",
340
+ "EJS": "235",
341
+ "Eagle": "209",
342
+ "Edoid": "277",
343
+ "Erlang": "247",
344
+ "Fish": "183",
345
+ "Fluent": "197",
346
+ "Fortran Free Form": "243",
347
+ "FreeBasic": "164",
348
+ "FreeMarker": "231",
349
+ "G-code": "262",
350
+ "GAP": "40",
351
+ "GAS": "233",
352
+ "GDB": "272",
353
+ "GDScript": "203",
354
+ "GLSL": "215",
355
+ "Gerber Image": "188",
356
+ "Gettext Catalog": "199",
357
+ "Gherkin": "268",
358
+ "Git Config": "67",
359
+ "Gnuplot": "176",
360
+ "Go": "257",
361
+ "Go Module": "178",
362
+ "Gradle": "237",
363
+ "Graphviz (DOT)": "246",
364
+ "Groovy": "190",
365
+ "HCL": "214",
366
+ "HTML+Razor": "242",
367
+ "Hack": "228",
368
+ "Haskell": "200",
369
+ "Haxe": "177",
370
+ "Hoon": "227",
371
+ "Ignore List": "158",
372
+ "ImageJ Macro": "201",
373
+ "Inform 7": "153",
374
+ "Inno Setup": "204",
375
+ "Io": "271",
376
+ "Ioke": "56",
377
+ "Isabelle": "218",
378
+ "J": "252",
379
+ "JAR Manifest": "53",
380
+ "JQ": "83",
381
+ "Java": "265",
382
+ "JavaScript": "236",
383
+ "Julia": "266",
384
+ "Jupyter Notebook": "239",
385
+ "KiCad Layout": "156",
386
+ "Kit": "245",
387
+ "Kotlin": "232",
388
+ "Kvlang": "64",
389
+ "LLVM": "249",
390
+ "Lean": "283",
391
+ "Less": "224",
392
+ "Lex": "261",
393
+ "Logtalk": "244",
394
+ "LookML": "212",
395
+ "Lua": "154",
396
+ "MATLAB": "191",
397
+ "Makefile": "273",
398
+ "Mathematica": "38",
399
+ "Metal": "217",
400
+ "Modelica": "169",
401
+ "Nim": "157",
402
+ "OCaml": "211",
403
+ "ObjDump": "180",
404
+ "Objective-C": "162",
405
+ "Objective-C++": "159",
406
+ "Open Policy Agent": "280",
407
+ "OpenEdge ABL": "279",
408
+ "OpenStep Property List": "39",
409
+ "OpenType Feature File": "240",
410
+ "PHP": "251",
411
+ "PLpgSQL": "229",
412
+ "POV-Ray SDL": "193",
413
+ "Pascal": "31",
414
+ "Perl": "264",
415
+ "Pickle": "170",
416
+ "PicoLisp": "17",
417
+ "PlantUML": "255",
418
+ "PostScript": "276",
419
+ "PowerShell": "173",
420
+ "Prolog": "102",
421
+ "Protocol Buffer Text Format": "241",
422
+ "PureScript": "202",
423
+ "Python": "270",
424
+ "R": "259",
425
+ "RDoc": "216",
426
+ "REALbasic": "58",
427
+ "Ragel in Ruby Host": "253",
428
+ "Rascal": "282",
429
+ "Raw token data": "168",
430
+ "Redcode": "127",
431
+ "Ruby": "161",
432
+ "Rust": "163",
433
+ "SCSS": "171",
434
+ "SQF": "198",
435
+ "SQL": "181",
436
+ "Sass": "281",
437
+ "Scala": "230",
438
+ "Scheme": "165",
439
+ "Scilab": "263",
440
+ "Shell": "172",
441
+ "Smali": "135",
442
+ "Solidity": "194",
443
+ "Squirrel": "160",
444
+ "Starlark": "166",
445
+ "Stylus": "221",
446
+ "Swift": "220",
447
+ "TSV": "206",
448
+ "TSX": "174",
449
+ "Thrift": "186",
450
+ "Turtle": "267",
451
+ "Twig": "234",
452
+ "TypeScript": "219",
453
+ "Unity3D Asset": "278",
454
+ "Unix Assembly": "100",
455
+ "Unknown": "91",
456
+ "VBScript": "196",
457
+ "VCL": "128",
458
+ "VHDL": "213",
459
+ "Verilog": "225",
460
+ "Vim Script": "256",
461
+ "Vim Snippet": "258",
462
+ "Visual Basic": "207",
463
+ "Visual Basic .NET": "250",
464
+ "Vue": "179",
465
+ "Wavefront Object": "184",
466
+ "XML Property List": "226",
467
+ "XS": "48",
468
+ "YANG": "189"
469
+ },
470
+ "layer_norm_eps": 1e-05,
471
+ "local_attention": 128,
472
+ "local_rope_theta": 10000.0,
473
+ "max_position_embeddings": 8192,
474
+ "mlp_bias": false,
475
+ "mlp_dropout": 0.0,
476
+ "model_type": "modernbert",
477
+ "norm_bias": false,
478
+ "norm_eps": 1e-05,
479
+ "num_attention_heads": 12,
480
+ "num_hidden_layers": 22,
481
+ "pad_token_id": 50283,
482
+ "position_embedding_type": "absolute",
483
+ "problem_type": "single_label_classification",
484
+ "repad_logits_with_grad": false,
485
+ "sep_token_id": 50282,
486
+ "sparse_pred_ignore_index": -100,
487
+ "sparse_prediction": false,
488
+ "transformers_version": "4.57.3",
489
+ "vocab_size": 50368
490
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02f30a579c5e501824e05114d534b9359828ae03efb02df522869c306ef9e8a6
3
+ size 599310308
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ada9de9dfea8d6abc067a6c0e8e2154382080a2f4def52676217d8bb1155b4
3
+ size 5905