modernbert-tmf-2 / config.json
richardcai's picture
Upload ModernBertForSequenceClassification
08fee93 verified
{
"architectures": [
"ModernBertForSequenceClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 50281,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 50281,
"decoder_bias": true,
"deterministic_flash_attn": false,
"embedding_dropout": 0.0,
"eos_token_id": 50282,
"global_attn_every_n_layers": 3,
"global_rope_theta": 160000.0,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "T1001",
"1": "T1005",
"2": "T1020",
"3": "T1021",
"4": "T1027",
"5": "T1033",
"6": "T1036",
"7": "T1040",
"8": "T1041",
"9": "T1046",
"10": "T1049",
"11": "T1053",
"12": "T1056",
"13": "T1059",
"14": "T1068",
"15": "T1069",
"16": "T1074",
"17": "T1078",
"18": "T1080",
"19": "T1082",
"20": "T1087",
"21": "T1098",
"22": "T1105",
"23": "T1106",
"24": "T1119",
"25": "T1132",
"26": "T1133",
"27": "T1134",
"28": "T1136",
"29": "T1187",
"30": "T1189",
"31": "T1190",
"32": "T1195",
"33": "T1200",
"34": "T1203",
"35": "T1204",
"36": "T1213",
"37": "T1485",
"38": "T1486",
"39": "T1489",
"40": "T1491",
"41": "T1495",
"42": "T1498",
"43": "T1499",
"44": "T1518",
"45": "T1529",
"46": "T1531",
"47": "T1548",
"48": "T1552",
"49": "T1557",
"50": "T1559",
"51": "T1560",
"52": "T1562",
"53": "T1565",
"54": "T1566",
"55": "T1595",
"56": "T1614",
"57": "T1652",
"58": "T1654",
"59": "T1657",
"60": "T1659"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"T1001": 0,
"T1005": 1,
"T1020": 2,
"T1021": 3,
"T1027": 4,
"T1033": 5,
"T1036": 6,
"T1040": 7,
"T1041": 8,
"T1046": 9,
"T1049": 10,
"T1053": 11,
"T1056": 12,
"T1059": 13,
"T1068": 14,
"T1069": 15,
"T1074": 16,
"T1078": 17,
"T1080": 18,
"T1082": 19,
"T1087": 20,
"T1098": 21,
"T1105": 22,
"T1106": 23,
"T1119": 24,
"T1132": 25,
"T1133": 26,
"T1134": 27,
"T1136": 28,
"T1187": 29,
"T1189": 30,
"T1190": 31,
"T1195": 32,
"T1200": 33,
"T1203": 34,
"T1204": 35,
"T1213": 36,
"T1485": 37,
"T1486": 38,
"T1489": 39,
"T1491": 40,
"T1495": 41,
"T1498": 42,
"T1499": 43,
"T1518": 44,
"T1529": 45,
"T1531": 46,
"T1548": 47,
"T1552": 48,
"T1557": 49,
"T1559": 50,
"T1560": 51,
"T1562": 52,
"T1565": 53,
"T1566": 54,
"T1595": 55,
"T1614": 56,
"T1652": 57,
"T1654": 58,
"T1657": 59,
"T1659": 60
},
"layer_norm_eps": 1e-05,
"local_attention": 128,
"local_rope_theta": 10000.0,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 50283,
"position_embedding_type": "absolute",
"problem_type": "multi_label_classification",
"repad_logits_with_grad": false,
"sep_token_id": 50282,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"torch_dtype": "float32",
"transformers_version": "4.55.0",
"vocab_size": 50368
}