tomercagan commited on
Commit
387b052
·
verified ·
1 Parent(s): 774199d

Training in progress, epoch 1

Browse files
Files changed (3) hide show
  1. config.json +490 -0
  2. model.safetensors +3 -0
  3. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForSequenceClassification"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "id2label": {
24
+ "0": "Scheme",
25
+ "1": "R",
26
+ "2": "Protocol Buffer Text Format",
27
+ "3": "Java",
28
+ "4": "J",
29
+ "5": "Erlang",
30
+ "6": "Smali",
31
+ "7": "Vim Snippet",
32
+ "8": "ApacheConf",
33
+ "9": "Edoid",
34
+ "10": "Csound Document",
35
+ "11": "Gherkin",
36
+ "12": "Unknown",
37
+ "13": "GLSL",
38
+ "14": "Bluespec",
39
+ "15": "VCL",
40
+ "16": "DIGITAL Command Language",
41
+ "17": "Redcode",
42
+ "18": "POV-Ray SDL",
43
+ "19": "Objective-C",
44
+ "20": "HCL",
45
+ "21": "Inform 7",
46
+ "22": "Groovy",
47
+ "23": "TypeScript",
48
+ "24": "C#",
49
+ "25": "XML Property List",
50
+ "26": "GAP",
51
+ "27": "Csound",
52
+ "28": "Python",
53
+ "29": "Assembly",
54
+ "30": "FreeBasic",
55
+ "31": "Fish",
56
+ "32": "MATLAB",
57
+ "33": "Thrift",
58
+ "34": "G-code",
59
+ "35": "HTML+Razor",
60
+ "36": "Hoon",
61
+ "37": "Vim Script",
62
+ "38": "Scilab",
63
+ "39": "PHP",
64
+ "40": "CMake",
65
+ "41": "Ragel in Ruby Host",
66
+ "42": "Prolog",
67
+ "43": "Fortran Free Form",
68
+ "44": "PowerShell",
69
+ "45": "FreeMarker",
70
+ "46": "Metal",
71
+ "47": "PostScript",
72
+ "48": "EJS",
73
+ "49": "Vue",
74
+ "50": "VBScript",
75
+ "51": "Mathematica",
76
+ "52": "Rust",
77
+ "53": "Fluent",
78
+ "54": "Starlark",
79
+ "55": "LookML",
80
+ "56": "Crystal",
81
+ "57": "Modelica",
82
+ "58": "TSV",
83
+ "59": "PicoLisp",
84
+ "60": "Blade",
85
+ "61": "Isabelle",
86
+ "62": "Objective-C++",
87
+ "63": "Eagle",
88
+ "64": "Ruby",
89
+ "65": "Inno Setup",
90
+ "66": "Gradle",
91
+ "67": "Visual Basic",
92
+ "68": "KiCad Layout",
93
+ "69": "AsciiDoc",
94
+ "70": "Gnuplot",
95
+ "71": "CODEOWNERS",
96
+ "72": "Git Config",
97
+ "73": "XS",
98
+ "74": "OpenStep Property List",
99
+ "75": "OCaml",
100
+ "76": "Go",
101
+ "77": "Lean",
102
+ "78": "GDScript",
103
+ "79": "Ignore List",
104
+ "80": "RDoc",
105
+ "81": "Swift",
106
+ "82": "Visual Basic .NET",
107
+ "83": "GAS",
108
+ "84": "Unity3D Asset",
109
+ "85": "Gerber Image",
110
+ "86": "PureScript",
111
+ "87": "SCSS",
112
+ "88": "Scala",
113
+ "89": "C",
114
+ "90": "Verilog",
115
+ "91": "Lua",
116
+ "92": "OpenEdge ABL",
117
+ "93": "TSX",
118
+ "94": "Nim",
119
+ "95": "GDB",
120
+ "96": "Solidity",
121
+ "97": "ObjDump",
122
+ "98": "Ada",
123
+ "99": "ImageJ Macro",
124
+ "100": "REALbasic",
125
+ "101": "VHDL",
126
+ "102": "Lex",
127
+ "103": "YANG",
128
+ "104": "Hack",
129
+ "105": "Rascal",
130
+ "106": "AGS Script",
131
+ "107": "Sass",
132
+ "108": "JAR Manifest",
133
+ "109": "Kotlin",
134
+ "110": "Dockerfile",
135
+ "111": "SQL",
136
+ "112": "ColdFusion",
137
+ "113": "Chapel",
138
+ "114": "Open Policy Agent",
139
+ "115": "PLpgSQL",
140
+ "116": "Twig",
141
+ "117": "CoffeeScript",
142
+ "118": "Haskell",
143
+ "119": "JavaScript",
144
+ "120": "ECL",
145
+ "121": "Adobe Font Metrics",
146
+ "122": "OpenType Feature File",
147
+ "123": "Logtalk",
148
+ "124": "Ioke",
149
+ "125": "Haxe",
150
+ "126": "Raw token data",
151
+ "127": "Apex",
152
+ "128": "D",
153
+ "129": "Perl",
154
+ "130": "Jupyter Notebook",
155
+ "131": "LLVM",
156
+ "132": "SQF",
157
+ "133": "Less",
158
+ "134": "Makefile",
159
+ "135": "Io",
160
+ "136": "JQ",
161
+ "137": "Stylus",
162
+ "138": "C++",
163
+ "139": "PlantUML",
164
+ "140": "Kit",
165
+ "141": "Shell",
166
+ "142": "Julia",
167
+ "143": "Kvlang",
168
+ "144": "Common Lisp",
169
+ "145": "Wavefront Object",
170
+ "146": "Squirrel",
171
+ "147": "Gettext Catalog",
172
+ "148": "Dart",
173
+ "149": "Unix Assembly",
174
+ "150": "Pascal",
175
+ "151": "Turtle",
176
+ "152": "Go Module",
177
+ "153": "Graphviz (DOT)",
178
+ "154": "Scheme",
179
+ "155": "R",
180
+ "156": "Protocol Buffer Text Format",
181
+ "157": "Java",
182
+ "158": "Erlang",
183
+ "159": "J",
184
+ "160": "Vim Snippet",
185
+ "161": "ApacheConf",
186
+ "162": "Edoid",
187
+ "163": "Gherkin",
188
+ "164": "GLSL",
189
+ "165": "Bluespec",
190
+ "166": "DIGITAL Command Language",
191
+ "167": "POV-Ray SDL",
192
+ "168": "Objective-C",
193
+ "169": "HCL",
194
+ "170": "Groovy",
195
+ "171": "TypeScript",
196
+ "172": "C#",
197
+ "173": "XML Property List",
198
+ "174": "Python",
199
+ "175": "Assembly",
200
+ "176": "FreeBasic",
201
+ "177": "Fish",
202
+ "178": "MATLAB",
203
+ "179": "Thrift",
204
+ "180": "G-code",
205
+ "181": "HTML+Razor",
206
+ "182": "Hoon",
207
+ "183": "Vim Script",
208
+ "184": "Scilab",
209
+ "185": "PHP",
210
+ "186": "CMake",
211
+ "187": "Ragel in Ruby Host",
212
+ "188": "Fortran Free Form",
213
+ "189": "PowerShell",
214
+ "190": "FreeMarker",
215
+ "191": "Metal",
216
+ "192": "PostScript",
217
+ "193": "EJS",
218
+ "194": "Vue",
219
+ "195": "VBScript",
220
+ "196": "Rust",
221
+ "197": "Fluent",
222
+ "198": "Starlark",
223
+ "199": "LookML",
224
+ "200": "Crystal",
225
+ "201": "Modelica",
226
+ "202": "TSV",
227
+ "203": "Blade",
228
+ "204": "Isabelle",
229
+ "205": "Objective-C++",
230
+ "206": "Eagle",
231
+ "207": "Ruby",
232
+ "208": "Inno Setup",
233
+ "209": "Gradle",
234
+ "210": "AsciiDoc",
235
+ "211": "Visual Basic",
236
+ "212": "KiCad Layout",
237
+ "213": "Gnuplot",
238
+ "214": "OCaml",
239
+ "215": "Go",
240
+ "216": "Lean",
241
+ "217": "GDScript",
242
+ "218": "Ignore List",
243
+ "219": "RDoc",
244
+ "220": "Swift",
245
+ "221": "Visual Basic .NET",
246
+ "222": "GAS",
247
+ "223": "Unity3D Asset",
248
+ "224": "Gerber Image",
249
+ "225": "PureScript",
250
+ "226": "SCSS",
251
+ "227": "Scala",
252
+ "228": "C",
253
+ "229": "Verilog",
254
+ "230": "Lua",
255
+ "231": "OpenEdge ABL",
256
+ "232": "TSX",
257
+ "233": "Nim",
258
+ "234": "GDB",
259
+ "235": "Solidity",
260
+ "236": "ObjDump",
261
+ "237": "Ada",
262
+ "238": "ImageJ Macro",
263
+ "239": "VHDL",
264
+ "240": "Lex",
265
+ "241": "YANG",
266
+ "242": "Hack",
267
+ "243": "Rascal",
268
+ "244": "Sass",
269
+ "245": "Kotlin",
270
+ "246": "Dockerfile",
271
+ "247": "SQL",
272
+ "248": "ColdFusion",
273
+ "249": "Open Policy Agent",
274
+ "250": "Chapel",
275
+ "251": "PLpgSQL",
276
+ "252": "Twig",
277
+ "253": "CoffeeScript",
278
+ "254": "Haskell",
279
+ "255": "JavaScript",
280
+ "256": "ECL",
281
+ "257": "OpenType Feature File",
282
+ "258": "Logtalk",
283
+ "259": "Haxe",
284
+ "260": "Raw token data",
285
+ "261": "Apex",
286
+ "262": "D",
287
+ "263": "Perl",
288
+ "264": "Jupyter Notebook",
289
+ "265": "LLVM",
290
+ "266": "SQF",
291
+ "267": "Less",
292
+ "268": "Makefile",
293
+ "269": "Io",
294
+ "270": "Pickle",
295
+ "271": "Stylus",
296
+ "272": "C++",
297
+ "273": "PlantUML",
298
+ "274": "Kit",
299
+ "275": "Julia",
300
+ "276": "Shell",
301
+ "277": "Wavefront Object",
302
+ "278": "Common Lisp",
303
+ "279": "Squirrel",
304
+ "280": "Gettext Catalog",
305
+ "281": "Dart",
306
+ "282": "Turtle",
307
+ "283": "Go Module",
308
+ "284": "Graphviz (DOT)"
309
+ },
310
+ "initializer_cutoff_factor": 2.0,
311
+ "initializer_range": 0.02,
312
+ "intermediate_size": 1152,
313
+ "label2id": {
314
+ "AGS Script": 106,
315
+ "Ada": 237,
316
+ "Adobe Font Metrics": 121,
317
+ "ApacheConf": 161,
318
+ "Apex": 261,
319
+ "AsciiDoc": 210,
320
+ "Assembly": 175,
321
+ "Blade": 203,
322
+ "Bluespec": 165,
323
+ "C": 228,
324
+ "C#": 172,
325
+ "C++": 272,
326
+ "CMake": 186,
327
+ "CODEOWNERS": 71,
328
+ "Chapel": 250,
329
+ "CoffeeScript": 253,
330
+ "ColdFusion": 248,
331
+ "Common Lisp": 278,
332
+ "Crystal": 200,
333
+ "Csound": 27,
334
+ "Csound Document": 10,
335
+ "D": 262,
336
+ "DIGITAL Command Language": 166,
337
+ "Dart": 281,
338
+ "Dockerfile": 246,
339
+ "ECL": 256,
340
+ "EJS": 193,
341
+ "Eagle": 206,
342
+ "Edoid": 162,
343
+ "Erlang": 158,
344
+ "Fish": 177,
345
+ "Fluent": 197,
346
+ "Fortran Free Form": 188,
347
+ "FreeBasic": 176,
348
+ "FreeMarker": 190,
349
+ "G-code": 180,
350
+ "GAP": 26,
351
+ "GAS": 222,
352
+ "GDB": 234,
353
+ "GDScript": 217,
354
+ "GLSL": 164,
355
+ "Gerber Image": 224,
356
+ "Gettext Catalog": 280,
357
+ "Gherkin": 163,
358
+ "Git Config": 72,
359
+ "Gnuplot": 213,
360
+ "Go": 215,
361
+ "Go Module": 283,
362
+ "Gradle": 209,
363
+ "Graphviz (DOT)": 284,
364
+ "Groovy": 170,
365
+ "HCL": 169,
366
+ "HTML+Razor": 181,
367
+ "Hack": 242,
368
+ "Haskell": 254,
369
+ "Haxe": 259,
370
+ "Hoon": 182,
371
+ "Ignore List": 218,
372
+ "ImageJ Macro": 238,
373
+ "Inform 7": 21,
374
+ "Inno Setup": 208,
375
+ "Io": 269,
376
+ "Ioke": 124,
377
+ "Isabelle": 204,
378
+ "J": 159,
379
+ "JAR Manifest": 108,
380
+ "JQ": 136,
381
+ "Java": 157,
382
+ "JavaScript": 255,
383
+ "Julia": 275,
384
+ "Jupyter Notebook": 264,
385
+ "KiCad Layout": 212,
386
+ "Kit": 274,
387
+ "Kotlin": 245,
388
+ "Kvlang": 143,
389
+ "LLVM": 265,
390
+ "Lean": 216,
391
+ "Less": 267,
392
+ "Lex": 240,
393
+ "Logtalk": 258,
394
+ "LookML": 199,
395
+ "Lua": 230,
396
+ "MATLAB": 178,
397
+ "Makefile": 268,
398
+ "Mathematica": 51,
399
+ "Metal": 191,
400
+ "Modelica": 201,
401
+ "Nim": 233,
402
+ "OCaml": 214,
403
+ "ObjDump": 236,
404
+ "Objective-C": 168,
405
+ "Objective-C++": 205,
406
+ "Open Policy Agent": 249,
407
+ "OpenEdge ABL": 231,
408
+ "OpenStep Property List": 74,
409
+ "OpenType Feature File": 257,
410
+ "PHP": 185,
411
+ "PLpgSQL": 251,
412
+ "POV-Ray SDL": 167,
413
+ "Pascal": 150,
414
+ "Perl": 263,
415
+ "Pickle": 270,
416
+ "PicoLisp": 59,
417
+ "PlantUML": 273,
418
+ "PostScript": 192,
419
+ "PowerShell": 189,
420
+ "Prolog": 42,
421
+ "Protocol Buffer Text Format": 156,
422
+ "PureScript": 225,
423
+ "Python": 174,
424
+ "R": 155,
425
+ "RDoc": 219,
426
+ "REALbasic": 100,
427
+ "Ragel in Ruby Host": 187,
428
+ "Rascal": 243,
429
+ "Raw token data": 260,
430
+ "Redcode": 17,
431
+ "Ruby": 207,
432
+ "Rust": 196,
433
+ "SCSS": 226,
434
+ "SQF": 266,
435
+ "SQL": 247,
436
+ "Sass": 244,
437
+ "Scala": 227,
438
+ "Scheme": 154,
439
+ "Scilab": 184,
440
+ "Shell": 276,
441
+ "Smali": 6,
442
+ "Solidity": 235,
443
+ "Squirrel": 279,
444
+ "Starlark": 198,
445
+ "Stylus": 271,
446
+ "Swift": 220,
447
+ "TSV": 202,
448
+ "TSX": 232,
449
+ "Thrift": 179,
450
+ "Turtle": 282,
451
+ "Twig": 252,
452
+ "TypeScript": 171,
453
+ "Unity3D Asset": 223,
454
+ "Unix Assembly": 149,
455
+ "Unknown": 12,
456
+ "VBScript": 195,
457
+ "VCL": 15,
458
+ "VHDL": 239,
459
+ "Verilog": 229,
460
+ "Vim Script": 183,
461
+ "Vim Snippet": 160,
462
+ "Visual Basic": 211,
463
+ "Visual Basic .NET": 221,
464
+ "Vue": 194,
465
+ "Wavefront Object": 277,
466
+ "XML Property List": 173,
467
+ "XS": 73,
468
+ "YANG": 241
469
+ },
470
+ "layer_norm_eps": 1e-05,
471
+ "local_attention": 128,
472
+ "local_rope_theta": 10000.0,
473
+ "max_position_embeddings": 8192,
474
+ "mlp_bias": false,
475
+ "mlp_dropout": 0.0,
476
+ "model_type": "modernbert",
477
+ "norm_bias": false,
478
+ "norm_eps": 1e-05,
479
+ "num_attention_heads": 12,
480
+ "num_hidden_layers": 22,
481
+ "pad_token_id": 50283,
482
+ "position_embedding_type": "absolute",
483
+ "problem_type": "single_label_classification",
484
+ "repad_logits_with_grad": false,
485
+ "sep_token_id": 50282,
486
+ "sparse_pred_ignore_index": -100,
487
+ "sparse_prediction": false,
488
+ "transformers_version": "4.57.1",
489
+ "vocab_size": 50368
490
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb156c838ecfbcc54c7cec81a558ffd22f5d64ab2962e1c36d70e9476e0adb39
3
+ size 599310308
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3854ae2bbf84a2f321615d2418a438502179d0cb0e1fd7192de453bc8ab5658
3
+ size 5905