{ "architectures": [ "ModernBertForMaskedLM" ], "attention_bias": false, "attention_dropout": 0.0, "auto_map": { "AutoModel": "modeling_havelock.HavelockTokenClassifier" }, "bos_token_id": 50281, "classifier_activation": "gelu", "classifier_bias": false, "classifier_dropout": 0.0, "classifier_pooling": "mean", "cls_token_id": 50281, "decoder_bias": true, "deterministic_flash_attn": false, "dtype": "float32", "embedding_dropout": 0.0, "eos_token_id": 50282, "global_attn_every_n_layers": 3, "gradient_checkpointing": false, "hidden_activation": "gelu", "hidden_size": 768, "id2label": { "0": "O-literate_abstract_noun", "1": "B-literate_abstract_noun", "10": "B-literate_agentless_passive", "100": "B-oral_discourse_formula", "101": "I-oral_discourse_formula", "102": "O-oral_embodied_action", "103": "B-oral_embodied_action", "104": "I-oral_embodied_action", "105": "O-oral_everyday_example", "106": "B-oral_everyday_example", "107": "I-oral_everyday_example", "108": "O-oral_imperative", "109": "B-oral_imperative", "11": "I-literate_agentless_passive", "110": "I-oral_imperative", "111": "O-oral_inclusive_we", "112": "B-oral_inclusive_we", "113": "I-oral_inclusive_we", "114": "O-oral_intensifier_doubling", "115": "B-oral_intensifier_doubling", "116": "I-oral_intensifier_doubling", "117": "O-oral_lexical_repetition", "118": "B-oral_lexical_repetition", "119": "I-oral_lexical_repetition", "12": "O-literate_aside", "120": "O-oral_named_individual", "121": "B-oral_named_individual", "122": "I-oral_named_individual", "123": "O-oral_parallelism", "124": "B-oral_parallelism", "125": "I-oral_parallelism", "126": "O-oral_phatic_check", "127": "B-oral_phatic_check", "128": "I-oral_phatic_check", "129": "O-oral_phatic_filler", "13": "B-literate_aside", "130": "B-oral_phatic_filler", "131": "I-oral_phatic_filler", "132": "O-oral_rhetorical_question", "133": "B-oral_rhetorical_question", "134": "I-oral_rhetorical_question", "135": "O-oral_second_person", "136": "B-oral_second_person", "137": "I-oral_second_person", "138": "O-oral_self_correction", "139": "B-oral_self_correction", "14": "I-literate_aside", "140": "I-oral_self_correction", "141": "O-oral_sensory_detail", "142": "B-oral_sensory_detail", "143": "I-oral_sensory_detail", "144": "O-oral_simple_conjunction", "145": "B-oral_simple_conjunction", "146": "I-oral_simple_conjunction", "147": "O-oral_specific_place", "148": "B-oral_specific_place", "149": "I-oral_specific_place", "15": "O-literate_categorical_statement", "150": "O-oral_temporal_anchor", "151": "B-oral_temporal_anchor", "152": "I-oral_temporal_anchor", "153": "O-oral_tricolon", "154": "B-oral_tricolon", "155": "I-oral_tricolon", "156": "O-oral_vocative", "157": "B-oral_vocative", "158": "I-oral_vocative", "16": "B-literate_categorical_statement", "17": "I-literate_categorical_statement", "18": "O-literate_causal_explicit", "19": "B-literate_causal_explicit", "2": "I-literate_abstract_noun", "20": "I-literate_causal_explicit", "21": "O-literate_citation", "22": "B-literate_citation", "23": "I-literate_citation", "24": "O-literate_conceptual_metaphor", "25": "B-literate_conceptual_metaphor", "26": "I-literate_conceptual_metaphor", "27": "O-literate_concessive", "28": "B-literate_concessive", "29": "I-literate_concessive", "3": "O-literate_additive_formal", "30": "O-literate_concessive_connector", "31": "B-literate_concessive_connector", "32": "I-literate_concessive_connector", "33": "O-literate_concrete_setting", "34": "B-literate_concrete_setting", "35": "I-literate_concrete_setting", "36": "O-literate_conditional", "37": "B-literate_conditional", "38": "I-literate_conditional", "39": "O-literate_contrastive", "4": "B-literate_additive_formal", "40": "B-literate_contrastive", "41": "I-literate_contrastive", "42": "O-literate_cross_reference", "43": "B-literate_cross_reference", "44": "I-literate_cross_reference", "45": "O-literate_definitional_move", "46": "B-literate_definitional_move", "47": "I-literate_definitional_move", "48": "O-literate_enumeration", "49": "B-literate_enumeration", "5": "I-literate_additive_formal", "50": "I-literate_enumeration", "51": "O-literate_epistemic_hedge", "52": "B-literate_epistemic_hedge", "53": "I-literate_epistemic_hedge", "54": "O-literate_evidential", "55": "B-literate_evidential", "56": "I-literate_evidential", "57": "O-literate_institutional_subject", "58": "B-literate_institutional_subject", "59": "I-literate_institutional_subject", "6": "O-literate_agent_demoted", "60": "O-literate_list_structure", "61": "B-literate_list_structure", "62": "I-literate_list_structure", "63": "O-literate_metadiscourse", "64": "B-literate_metadiscourse", "65": "I-literate_metadiscourse", "66": "O-literate_nested_clauses", "67": "B-literate_nested_clauses", "68": "I-literate_nested_clauses", "69": "O-literate_nominalization", "7": "B-literate_agent_demoted", "70": "B-literate_nominalization", "71": "I-literate_nominalization", "72": "O-literate_objectifying_stance", "73": "B-literate_objectifying_stance", "74": "I-literate_objectifying_stance", "75": "O-literate_probability", "76": "B-literate_probability", "77": "I-literate_probability", "78": "O-literate_qualified_assertion", "79": "B-literate_qualified_assertion", "8": "I-literate_agent_demoted", "80": "I-literate_qualified_assertion", "81": "O-literate_relative_chain", "82": "B-literate_relative_chain", "83": "I-literate_relative_chain", "84": "O-literate_technical_abbreviation", "85": "B-literate_technical_abbreviation", "86": "I-literate_technical_abbreviation", "87": "O-literate_technical_term", "88": "B-literate_technical_term", "89": "I-literate_technical_term", "9": "O-literate_agentless_passive", "90": "O-literate_temporal_embedding", "91": "B-literate_temporal_embedding", "92": "I-literate_temporal_embedding", "93": "O-oral_anaphora", "94": "B-oral_anaphora", "95": "I-oral_anaphora", "96": "O-oral_antithesis", "97": "B-oral_antithesis", "98": "I-oral_antithesis", "99": "O-oral_discourse_formula" }, "initializer_cutoff_factor": 2.0, "initializer_range": 0.02, "intermediate_size": 1152, "label2id": { "B-literate_abstract_noun": 1, "B-literate_additive_formal": 4, "B-literate_agent_demoted": 7, "B-literate_agentless_passive": 10, "B-literate_aside": 13, "B-literate_categorical_statement": 16, "B-literate_causal_explicit": 19, "B-literate_citation": 22, "B-literate_conceptual_metaphor": 25, "B-literate_concessive": 28, "B-literate_concessive_connector": 31, "B-literate_concrete_setting": 34, "B-literate_conditional": 37, "B-literate_contrastive": 40, "B-literate_cross_reference": 43, "B-literate_definitional_move": 46, "B-literate_enumeration": 49, "B-literate_epistemic_hedge": 52, "B-literate_evidential": 55, "B-literate_institutional_subject": 58, "B-literate_list_structure": 61, "B-literate_metadiscourse": 64, "B-literate_nested_clauses": 67, "B-literate_nominalization": 70, "B-literate_objectifying_stance": 73, "B-literate_probability": 76, "B-literate_qualified_assertion": 79, "B-literate_relative_chain": 82, "B-literate_technical_abbreviation": 85, "B-literate_technical_term": 88, "B-literate_temporal_embedding": 91, "B-oral_anaphora": 94, "B-oral_antithesis": 97, "B-oral_discourse_formula": 100, "B-oral_embodied_action": 103, "B-oral_everyday_example": 106, "B-oral_imperative": 109, "B-oral_inclusive_we": 112, "B-oral_intensifier_doubling": 115, "B-oral_lexical_repetition": 118, "B-oral_named_individual": 121, "B-oral_parallelism": 124, "B-oral_phatic_check": 127, "B-oral_phatic_filler": 130, "B-oral_rhetorical_question": 133, "B-oral_second_person": 136, "B-oral_self_correction": 139, "B-oral_sensory_detail": 142, "B-oral_simple_conjunction": 145, "B-oral_specific_place": 148, "B-oral_temporal_anchor": 151, "B-oral_tricolon": 154, "B-oral_vocative": 157, "I-literate_abstract_noun": 2, "I-literate_additive_formal": 5, "I-literate_agent_demoted": 8, "I-literate_agentless_passive": 11, "I-literate_aside": 14, "I-literate_categorical_statement": 17, "I-literate_causal_explicit": 20, "I-literate_citation": 23, "I-literate_conceptual_metaphor": 26, "I-literate_concessive": 29, "I-literate_concessive_connector": 32, "I-literate_concrete_setting": 35, "I-literate_conditional": 38, "I-literate_contrastive": 41, "I-literate_cross_reference": 44, "I-literate_definitional_move": 47, "I-literate_enumeration": 50, "I-literate_epistemic_hedge": 53, "I-literate_evidential": 56, "I-literate_institutional_subject": 59, "I-literate_list_structure": 62, "I-literate_metadiscourse": 65, "I-literate_nested_clauses": 68, "I-literate_nominalization": 71, "I-literate_objectifying_stance": 74, "I-literate_probability": 77, "I-literate_qualified_assertion": 80, "I-literate_relative_chain": 83, "I-literate_technical_abbreviation": 86, "I-literate_technical_term": 89, "I-literate_temporal_embedding": 92, "I-oral_anaphora": 95, "I-oral_antithesis": 98, "I-oral_discourse_formula": 101, "I-oral_embodied_action": 104, "I-oral_everyday_example": 107, "I-oral_imperative": 110, "I-oral_inclusive_we": 113, "I-oral_intensifier_doubling": 116, "I-oral_lexical_repetition": 119, "I-oral_named_individual": 122, "I-oral_parallelism": 125, "I-oral_phatic_check": 128, "I-oral_phatic_filler": 131, "I-oral_rhetorical_question": 134, "I-oral_second_person": 137, "I-oral_self_correction": 140, "I-oral_sensory_detail": 143, "I-oral_simple_conjunction": 146, "I-oral_specific_place": 149, "I-oral_temporal_anchor": 152, "I-oral_tricolon": 155, "I-oral_vocative": 158, "O-literate_abstract_noun": 0, "O-literate_additive_formal": 3, "O-literate_agent_demoted": 6, "O-literate_agentless_passive": 9, "O-literate_aside": 12, "O-literate_categorical_statement": 15, "O-literate_causal_explicit": 18, "O-literate_citation": 21, "O-literate_conceptual_metaphor": 24, "O-literate_concessive": 27, "O-literate_concessive_connector": 30, "O-literate_concrete_setting": 33, "O-literate_conditional": 36, "O-literate_contrastive": 39, "O-literate_cross_reference": 42, "O-literate_definitional_move": 45, "O-literate_enumeration": 48, "O-literate_epistemic_hedge": 51, "O-literate_evidential": 54, "O-literate_institutional_subject": 57, "O-literate_list_structure": 60, "O-literate_metadiscourse": 63, "O-literate_nested_clauses": 66, "O-literate_nominalization": 69, "O-literate_objectifying_stance": 72, "O-literate_probability": 75, "O-literate_qualified_assertion": 78, "O-literate_relative_chain": 81, "O-literate_technical_abbreviation": 84, "O-literate_technical_term": 87, "O-literate_temporal_embedding": 90, "O-oral_anaphora": 93, "O-oral_antithesis": 96, "O-oral_discourse_formula": 99, "O-oral_embodied_action": 102, "O-oral_everyday_example": 105, "O-oral_imperative": 108, "O-oral_inclusive_we": 111, "O-oral_intensifier_doubling": 114, "O-oral_lexical_repetition": 117, "O-oral_named_individual": 120, "O-oral_parallelism": 123, "O-oral_phatic_check": 126, "O-oral_phatic_filler": 129, "O-oral_rhetorical_question": 132, "O-oral_second_person": 135, "O-oral_self_correction": 138, "O-oral_sensory_detail": 141, "O-oral_simple_conjunction": 144, "O-oral_specific_place": 147, "O-oral_temporal_anchor": 150, "O-oral_tricolon": 153, "O-oral_vocative": 156 }, "layer_norm_eps": 1e-05, "layer_types": [ "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention" ], "local_attention": 128, "max_position_embeddings": 8192, "mlp_bias": false, "mlp_dropout": 0.0, "model_type": "modernbert", "norm_bias": false, "norm_eps": 1e-05, "num_attention_heads": 12, "num_hidden_layers": 22, "num_types": 53, "pad_token_id": 50283, "position_embedding_type": "absolute", "repad_logits_with_grad": false, "rope_parameters": { "full_attention": { "rope_theta": 160000.0, "rope_type": "default" }, "sliding_attention": { "rope_theta": 10000.0, "rope_type": "default" } }, "sep_token_id": 50282, "sparse_pred_ignore_index": -100, "sparse_prediction": false, "tie_word_embeddings": true, "transformers_version": "5.0.0", "use_crf": true, "vocab_size": 50368 }