| { |
| "_name_or_path": "microsoft/deberta-v3-base", |
| "architectures": [ |
| "DebertaV2ForSequenceClassification" |
| ], |
| "attention_probs_dropout_prob": 0.1, |
| "hidden_act": "gelu", |
| "hidden_dropout_prob": 0.1, |
| "hidden_size": 768, |
| "id2label": { |
| "0": "astro-ph", |
| "1": "cond-mat", |
| "2": "cs", |
| "3": "eess", |
| "4": "hep-ph", |
| "5": "hep-th", |
| "6": "math", |
| "7": "physics", |
| "8": "quant-ph", |
| "9": "stat" |
| }, |
| "initializer_range": 0.02, |
| "intermediate_size": 3072, |
| "label2id": { |
| "astro-ph": 0, |
| "cond-mat": 1, |
| "cs": 2, |
| "eess": 3, |
| "hep-ph": 4, |
| "hep-th": 5, |
| "math": 6, |
| "physics": 7, |
| "quant-ph": 8, |
| "stat": 9 |
| }, |
| "layer_norm_eps": 1e-07, |
| "max_position_embeddings": 512, |
| "max_relative_positions": -1, |
| "model_type": "deberta-v2", |
| "norm_rel_ebd": "layer_norm", |
| "num_attention_heads": 12, |
| "num_hidden_layers": 12, |
| "pad_token_id": 0, |
| "pooler_dropout": 0, |
| "pooler_hidden_act": "gelu", |
| "pooler_hidden_size": 768, |
| "pos_att_type": [ |
| "p2c", |
| "c2p" |
| ], |
| "position_biased_input": false, |
| "position_buckets": 256, |
| "relative_attention": true, |
| "share_att_key": true, |
| "torch_dtype": "float32", |
| "transformers_version": "4.44.0", |
| "type_vocab_size": 0, |
| "vocab_size": 128100 |
| } |
|
|