artemis13fowl's picture
Training in progress, epoch 1
66ed7f3 verified
raw
history blame
11.1 kB
{
"architectures": [
"ModernBertForSequenceClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 50281,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 50281,
"decoder_bias": true,
"deterministic_flash_attn": false,
"dtype": "float32",
"embedding_dropout": 0.0,
"eos_token_id": 50282,
"global_attn_every_n_layers": 3,
"global_rope_theta": 160000.0,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "Lua",
"1": "KiCad Layout",
"10": "Rust",
"100": "Unix Assembly",
"101": "OpenType Feature File",
"102": "Prolog",
"103": "Protocol Buffer Text Format",
"104": "HTML+Razor",
"105": "Fortran Free Form",
"106": "Logtalk",
"107": "Kit",
"108": "Graphviz (DOT)",
"109": "Erlang",
"11": "Scheme",
"110": "C",
"111": "LLVM",
"112": "Visual Basic .NET",
"113": "PHP",
"114": "J",
"115": "Ragel in Ruby Host",
"116": "CoffeeScript",
"117": "PlantUML",
"118": "Vim Script",
"119": "Go",
"12": "FreeBasic",
"120": "Vim Snippet",
"121": "R",
"122": "Dockerfile",
"123": "Lex",
"124": "G-code",
"125": "Scilab",
"126": "Csound",
"127": "Redcode",
"128": "VCL",
"129": "Perl",
"13": "Starlark",
"130": "Java",
"131": "Csound Document",
"132": "Julia",
"133": "Turtle",
"134": "Gherkin",
"135": "Smali",
"136": "C++",
"137": "Python",
"138": "CODEOWNERS",
"139": "Io",
"14": "D",
"140": "GDB",
"141": "Makefile",
"142": "Common Lisp",
"143": "Apex",
"144": "PostScript",
"145": "Edoid",
"146": "Unity3D Asset",
"147": "OpenEdge ABL",
"148": "Open Policy Agent",
"149": "Sass",
"15": "Raw token data",
"150": "Rascal",
"151": "Lean",
"152": "Assembly",
"153": "Inform 7",
"154": "Lua",
"155": "Chapel",
"156": "KiCad Layout",
"157": "Nim",
"158": "Ignore List",
"159": "Objective-C++",
"16": "Modelica",
"160": "Squirrel",
"161": "Ruby",
"162": "Objective-C",
"163": "Rust",
"164": "FreeBasic",
"165": "Scheme",
"166": "Starlark",
"167": "D",
"168": "Raw token data",
"169": "Modelica",
"17": "PicoLisp",
"170": "Pickle",
"171": "SCSS",
"172": "Shell",
"173": "PowerShell",
"174": "TSX",
"175": "C#",
"176": "Gnuplot",
"177": "Haxe",
"178": "Go Module",
"179": "Vue",
"18": "SCSS",
"180": "ObjDump",
"181": "SQL",
"182": "AsciiDoc",
"183": "Fish",
"184": "Wavefront Object",
"185": "DIGITAL Command Language",
"186": "Thrift",
"187": "ApacheConf",
"188": "Gerber Image",
"189": "YANG",
"19": "Shell",
"190": "Groovy",
"191": "MATLAB",
"192": "Bluespec",
"193": "POV-Ray SDL",
"194": "Solidity",
"195": "Dart",
"196": "VBScript",
"197": "Fluent",
"198": "SQF",
"199": "Gettext Catalog",
"2": "Nim",
"20": "PowerShell",
"200": "Haskell",
"201": "ImageJ Macro",
"202": "PureScript",
"203": "GDScript",
"204": "Inno Setup",
"205": "ColdFusion",
"206": "TSV",
"207": "Visual Basic",
"208": "CMake",
"209": "Eagle",
"21": "TSX",
"210": "Blade",
"211": "OCaml",
"212": "LookML",
"213": "VHDL",
"214": "HCL",
"215": "GLSL",
"216": "RDoc",
"217": "Metal",
"218": "Isabelle",
"219": "TypeScript",
"22": "C#",
"220": "Swift",
"221": "Stylus",
"222": "Ada",
"223": "Crystal",
"224": "Less",
"225": "Verilog",
"226": "XML Property List",
"227": "Hoon",
"228": "Hack",
"229": "PLpgSQL",
"23": "Gnuplot",
"230": "Scala",
"231": "FreeMarker",
"232": "Kotlin",
"233": "GAS",
"234": "Twig",
"235": "EJS",
"236": "JavaScript",
"237": "Gradle",
"238": "ECL",
"239": "Jupyter Notebook",
"24": "Haxe",
"240": "OpenType Feature File",
"241": "Protocol Buffer Text Format",
"242": "HTML+Razor",
"243": "Fortran Free Form",
"244": "Logtalk",
"245": "Kit",
"246": "Graphviz (DOT)",
"247": "Erlang",
"248": "C",
"249": "LLVM",
"25": "Go Module",
"250": "Visual Basic .NET",
"251": "PHP",
"252": "J",
"253": "Ragel in Ruby Host",
"254": "CoffeeScript",
"255": "PlantUML",
"256": "Vim Script",
"257": "Go",
"258": "Vim Snippet",
"259": "R",
"26": "Vue",
"260": "Dockerfile",
"261": "Lex",
"262": "G-code",
"263": "Scilab",
"264": "Perl",
"265": "Java",
"266": "Julia",
"267": "Turtle",
"268": "Gherkin",
"269": "C++",
"27": "ObjDump",
"270": "Python",
"271": "Io",
"272": "GDB",
"273": "Makefile",
"274": "Common Lisp",
"275": "Apex",
"276": "PostScript",
"277": "Edoid",
"278": "Unity3D Asset",
"279": "OpenEdge ABL",
"28": "SQL",
"280": "Open Policy Agent",
"281": "Sass",
"282": "Rascal",
"283": "Lean",
"284": "Assembly",
"29": "AsciiDoc",
"3": "Chapel",
"30": "Fish",
"31": "Pascal",
"32": "Wavefront Object",
"33": "DIGITAL Command Language",
"34": "Thrift",
"35": "ApacheConf",
"36": "Gerber Image",
"37": "YANG",
"38": "Mathematica",
"39": "OpenStep Property List",
"4": "Ignore List",
"40": "GAP",
"41": "Groovy",
"42": "MATLAB",
"43": "Adobe Font Metrics",
"44": "Bluespec",
"45": "POV-Ray SDL",
"46": "Solidity",
"47": "Dart",
"48": "XS",
"49": "VBScript",
"5": "Objective-C++",
"50": "Fluent",
"51": "SQF",
"52": "Gettext Catalog",
"53": "JAR Manifest",
"54": "Haskell",
"55": "ImageJ Macro",
"56": "Ioke",
"57": "PureScript",
"58": "REALbasic",
"59": "GDScript",
"6": "Squirrel",
"60": "Inno Setup",
"61": "ColdFusion",
"62": "TSV",
"63": "Visual Basic",
"64": "Kvlang",
"65": "CMake",
"66": "Eagle",
"67": "Git Config",
"68": "Blade",
"69": "OCaml",
"7": "AGS Script",
"70": "HCL",
"71": "VHDL",
"72": "LookML",
"73": "GLSL",
"74": "RDoc",
"75": "Metal",
"76": "Isabelle",
"77": "TypeScript",
"78": "Swift",
"79": "Stylus",
"8": "Ruby",
"80": "Ada",
"81": "Crystal",
"82": "Less",
"83": "JQ",
"84": "Verilog",
"85": "XML Property List",
"86": "Hoon",
"87": "Hack",
"88": "PLpgSQL",
"89": "Scala",
"9": "Objective-C",
"90": "FreeMarker",
"91": "Unknown",
"92": "Kotlin",
"93": "GAS",
"94": "Twig",
"95": "EJS",
"96": "JavaScript",
"97": "Gradle",
"98": "ECL",
"99": "Jupyter Notebook"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"AGS Script": "7",
"Ada": "222",
"Adobe Font Metrics": "43",
"ApacheConf": "187",
"Apex": "275",
"AsciiDoc": "182",
"Assembly": "284",
"Blade": "210",
"Bluespec": "192",
"C": "248",
"C#": "175",
"C++": "269",
"CMake": "208",
"CODEOWNERS": "138",
"Chapel": "155",
"CoffeeScript": "254",
"ColdFusion": "205",
"Common Lisp": "274",
"Crystal": "223",
"Csound": "126",
"Csound Document": "131",
"D": "167",
"DIGITAL Command Language": "185",
"Dart": "195",
"Dockerfile": "260",
"ECL": "238",
"EJS": "235",
"Eagle": "209",
"Edoid": "277",
"Erlang": "247",
"Fish": "183",
"Fluent": "197",
"Fortran Free Form": "243",
"FreeBasic": "164",
"FreeMarker": "231",
"G-code": "262",
"GAP": "40",
"GAS": "233",
"GDB": "272",
"GDScript": "203",
"GLSL": "215",
"Gerber Image": "188",
"Gettext Catalog": "199",
"Gherkin": "268",
"Git Config": "67",
"Gnuplot": "176",
"Go": "257",
"Go Module": "178",
"Gradle": "237",
"Graphviz (DOT)": "246",
"Groovy": "190",
"HCL": "214",
"HTML+Razor": "242",
"Hack": "228",
"Haskell": "200",
"Haxe": "177",
"Hoon": "227",
"Ignore List": "158",
"ImageJ Macro": "201",
"Inform 7": "153",
"Inno Setup": "204",
"Io": "271",
"Ioke": "56",
"Isabelle": "218",
"J": "252",
"JAR Manifest": "53",
"JQ": "83",
"Java": "265",
"JavaScript": "236",
"Julia": "266",
"Jupyter Notebook": "239",
"KiCad Layout": "156",
"Kit": "245",
"Kotlin": "232",
"Kvlang": "64",
"LLVM": "249",
"Lean": "283",
"Less": "224",
"Lex": "261",
"Logtalk": "244",
"LookML": "212",
"Lua": "154",
"MATLAB": "191",
"Makefile": "273",
"Mathematica": "38",
"Metal": "217",
"Modelica": "169",
"Nim": "157",
"OCaml": "211",
"ObjDump": "180",
"Objective-C": "162",
"Objective-C++": "159",
"Open Policy Agent": "280",
"OpenEdge ABL": "279",
"OpenStep Property List": "39",
"OpenType Feature File": "240",
"PHP": "251",
"PLpgSQL": "229",
"POV-Ray SDL": "193",
"Pascal": "31",
"Perl": "264",
"Pickle": "170",
"PicoLisp": "17",
"PlantUML": "255",
"PostScript": "276",
"PowerShell": "173",
"Prolog": "102",
"Protocol Buffer Text Format": "241",
"PureScript": "202",
"Python": "270",
"R": "259",
"RDoc": "216",
"REALbasic": "58",
"Ragel in Ruby Host": "253",
"Rascal": "282",
"Raw token data": "168",
"Redcode": "127",
"Ruby": "161",
"Rust": "163",
"SCSS": "171",
"SQF": "198",
"SQL": "181",
"Sass": "281",
"Scala": "230",
"Scheme": "165",
"Scilab": "263",
"Shell": "172",
"Smali": "135",
"Solidity": "194",
"Squirrel": "160",
"Starlark": "166",
"Stylus": "221",
"Swift": "220",
"TSV": "206",
"TSX": "174",
"Thrift": "186",
"Turtle": "267",
"Twig": "234",
"TypeScript": "219",
"Unity3D Asset": "278",
"Unix Assembly": "100",
"Unknown": "91",
"VBScript": "196",
"VCL": "128",
"VHDL": "213",
"Verilog": "225",
"Vim Script": "256",
"Vim Snippet": "258",
"Visual Basic": "207",
"Visual Basic .NET": "250",
"Vue": "179",
"Wavefront Object": "184",
"XML Property List": "226",
"XS": "48",
"YANG": "189"
},
"layer_norm_eps": 1e-05,
"local_attention": 128,
"local_rope_theta": 10000.0,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 50283,
"position_embedding_type": "absolute",
"problem_type": "single_label_classification",
"repad_logits_with_grad": false,
"sep_token_id": 50282,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"transformers_version": "4.57.3",
"vocab_size": 50368
}