carlyou's picture
Training in progress, epoch 1
3782c26 verified
raw
history blame
7.78 kB
{
"architectures": [
"ModernBertForSequenceClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 50281,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 50281,
"decoder_bias": true,
"deterministic_flash_attn": false,
"dtype": "float32",
"embedding_dropout": 0.0,
"eos_token_id": 50282,
"global_attn_every_n_layers": 3,
"global_rope_theta": 160000.0,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "GAP",
"1": "Squirrel",
"2": "Objective-C",
"3": "Gerber Image",
"4": "Open Policy Agent",
"5": "Jupyter Notebook",
"6": "Rascal",
"7": "Starlark",
"8": "ImageJ Macro",
"9": "PLpgSQL",
"10": "SCSS",
"11": "Eagle",
"12": "Bluespec",
"13": "C++",
"14": "PHP",
"15": "GDScript",
"16": "ApacheConf",
"17": "Modelica",
"18": "Pickle",
"19": "MATLAB",
"20": "Kit",
"21": "Erlang",
"22": "DIGITAL Command Language",
"23": "TypeScript",
"24": "Lua",
"25": "LLVM",
"26": "Stylus",
"27": "Lean",
"28": "Gradle",
"29": "Vim Snippet",
"30": "Fortran Free Form",
"31": "Vue",
"32": "Swift",
"33": "GLSL",
"34": "TSX",
"35": "Objective-C++",
"36": "Prolog",
"37": "Dockerfile",
"38": "Twig",
"39": "Wavefront Object",
"40": "Go",
"41": "Makefile",
"42": "XML Property List",
"43": "CODEOWNERS",
"44": "Inform 7",
"45": "Unix Assembly",
"46": "Gettext Catalog",
"47": "CMake",
"48": "XS",
"49": "Solidity",
"50": "Lex",
"51": "AGS Script",
"52": "Groovy",
"53": "C#",
"54": "Thrift",
"55": "Edoid",
"56": "Pascal",
"57": "Fluent",
"58": "PowerShell",
"59": "Java",
"60": "VBScript",
"61": "YANG",
"62": "Raw token data",
"63": "Haskell",
"64": "Hoon",
"65": "Mathematica",
"66": "SQF",
"67": "Inno Setup",
"68": "Ragel in Ruby Host",
"69": "Go Module",
"70": "Vim Script",
"71": "J",
"72": "ObjDump",
"73": "PicoLisp",
"74": "HTML+Razor",
"75": "Gnuplot",
"76": "FreeMarker",
"77": "Assembly",
"78": "Scala",
"79": "RDoc",
"80": "OpenType Feature File",
"81": "ColdFusion",
"82": "Julia",
"83": "OpenStep Property List",
"84": "Csound",
"85": "GDB",
"86": "Dart",
"87": "Git Config",
"88": "Gherkin",
"89": "JAR Manifest",
"90": "AsciiDoc",
"91": "Visual Basic .NET",
"92": "Csound Document",
"93": "HCL",
"94": "OCaml",
"95": "Io",
"96": "PlantUML",
"97": "Nim",
"98": "Rust",
"99": "VCL",
"100": "G-code",
"101": "Metal",
"102": "Visual Basic",
"103": "GAS",
"104": "Adobe Font Metrics",
"105": "Scilab",
"106": "Shell",
"107": "TSV",
"108": "Ignore List",
"109": "Ruby",
"110": "KiCad Layout",
"111": "LookML",
"112": "ECL",
"113": "D",
"114": "Unity3D Asset",
"115": "PostScript",
"116": "R",
"117": "JQ",
"118": "Blade",
"119": "EJS",
"120": "C",
"121": "Chapel",
"122": "Redcode",
"123": "SQL",
"124": "PureScript",
"125": "FreeBasic",
"126": "CoffeeScript",
"127": "Scheme",
"128": "Unknown",
"129": "Haxe",
"130": "Sass",
"131": "Kotlin",
"132": "Protocol Buffer Text Format",
"133": "Fish",
"134": "OpenEdge ABL",
"135": "Less",
"136": "Ioke",
"137": "Common Lisp",
"138": "Kvlang",
"139": "Python",
"140": "REALbasic",
"141": "Hack",
"142": "Logtalk",
"143": "VHDL",
"144": "Apex",
"145": "Turtle",
"146": "POV-Ray SDL",
"147": "Ada",
"148": "Crystal",
"149": "Graphviz (DOT)",
"150": "Verilog",
"151": "JavaScript",
"152": "Smali",
"153": "Perl",
"154": "Isabelle"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"AGS Script": 51,
"Ada": 147,
"Adobe Font Metrics": 104,
"ApacheConf": 16,
"Apex": 144,
"AsciiDoc": 90,
"Assembly": 77,
"Blade": 118,
"Bluespec": 12,
"C": 120,
"C#": 53,
"C++": 13,
"CMake": 47,
"CODEOWNERS": 43,
"Chapel": 121,
"CoffeeScript": 126,
"ColdFusion": 81,
"Common Lisp": 137,
"Crystal": 148,
"Csound": 84,
"Csound Document": 92,
"D": 113,
"DIGITAL Command Language": 22,
"Dart": 86,
"Dockerfile": 37,
"ECL": 112,
"EJS": 119,
"Eagle": 11,
"Edoid": 55,
"Erlang": 21,
"Fish": 133,
"Fluent": 57,
"Fortran Free Form": 30,
"FreeBasic": 125,
"FreeMarker": 76,
"G-code": 100,
"GAP": 0,
"GAS": 103,
"GDB": 85,
"GDScript": 15,
"GLSL": 33,
"Gerber Image": 3,
"Gettext Catalog": 46,
"Gherkin": 88,
"Git Config": 87,
"Gnuplot": 75,
"Go": 40,
"Go Module": 69,
"Gradle": 28,
"Graphviz (DOT)": 149,
"Groovy": 52,
"HCL": 93,
"HTML+Razor": 74,
"Hack": 141,
"Haskell": 63,
"Haxe": 129,
"Hoon": 64,
"Ignore List": 108,
"ImageJ Macro": 8,
"Inform 7": 44,
"Inno Setup": 67,
"Io": 95,
"Ioke": 136,
"Isabelle": 154,
"J": 71,
"JAR Manifest": 89,
"JQ": 117,
"Java": 59,
"JavaScript": 151,
"Julia": 82,
"Jupyter Notebook": 5,
"KiCad Layout": 110,
"Kit": 20,
"Kotlin": 131,
"Kvlang": 138,
"LLVM": 25,
"Lean": 27,
"Less": 135,
"Lex": 50,
"Logtalk": 142,
"LookML": 111,
"Lua": 24,
"MATLAB": 19,
"Makefile": 41,
"Mathematica": 65,
"Metal": 101,
"Modelica": 17,
"Nim": 97,
"OCaml": 94,
"ObjDump": 72,
"Objective-C": 2,
"Objective-C++": 35,
"Open Policy Agent": 4,
"OpenEdge ABL": 134,
"OpenStep Property List": 83,
"OpenType Feature File": 80,
"PHP": 14,
"PLpgSQL": 9,
"POV-Ray SDL": 146,
"Pascal": 56,
"Perl": 153,
"Pickle": 18,
"PicoLisp": 73,
"PlantUML": 96,
"PostScript": 115,
"PowerShell": 58,
"Prolog": 36,
"Protocol Buffer Text Format": 132,
"PureScript": 124,
"Python": 139,
"R": 116,
"RDoc": 79,
"REALbasic": 140,
"Ragel in Ruby Host": 68,
"Rascal": 6,
"Raw token data": 62,
"Redcode": 122,
"Ruby": 109,
"Rust": 98,
"SCSS": 10,
"SQF": 66,
"SQL": 123,
"Sass": 130,
"Scala": 78,
"Scheme": 127,
"Scilab": 105,
"Shell": 106,
"Smali": 152,
"Solidity": 49,
"Squirrel": 1,
"Starlark": 7,
"Stylus": 26,
"Swift": 32,
"TSV": 107,
"TSX": 34,
"Thrift": 54,
"Turtle": 145,
"Twig": 38,
"TypeScript": 23,
"Unity3D Asset": 114,
"Unix Assembly": 45,
"Unknown": 128,
"VBScript": 60,
"VCL": 99,
"VHDL": 143,
"Verilog": 150,
"Vim Script": 70,
"Vim Snippet": 29,
"Visual Basic": 102,
"Visual Basic .NET": 91,
"Vue": 31,
"Wavefront Object": 39,
"XML Property List": 42,
"XS": 48,
"YANG": 61
},
"layer_norm_eps": 1e-05,
"local_attention": 128,
"local_rope_theta": 10000.0,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 50283,
"position_embedding_type": "absolute",
"problem_type": "single_label_classification",
"repad_logits_with_grad": false,
"sep_token_id": 50282,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"transformers_version": "4.57.3",
"vocab_size": 50368
}