| { |
| "_name_or_path": "microsoft/mdeberta-v3-base", |
| "architectures": [ |
| "DebertaV2ForSequenceClassification" |
| ], |
| "attention_probs_dropout_prob": 0.1, |
| "classifiers_size": [ |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 11, |
| 15, |
| 31, |
| 12, |
| 31, |
| 2, |
| 2, |
| 3, |
| 5, |
| 6, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 2, |
| 2, |
| 2, |
| 2, |
| 6, |
| 6, |
| 6, |
| 69, |
| 2, |
| 1, |
| 8, |
| 10, |
| 2, |
| 2, |
| 5, |
| 2, |
| 2, |
| 2, |
| 2, |
| 1, |
| 1, |
| 1, |
| 20, |
| 235, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 1, |
| 9, |
| 17 |
| ], |
| "hidden_act": "gelu", |
| "hidden_dropout_prob": 0.1, |
| "hidden_size": 768, |
| "id2label": { |
| "0": "entailment", |
| "1": "neutral", |
| "2": "contradiction" |
| }, |
| "initializer_range": 0.02, |
| "intermediate_size": 3072, |
| "label2id": { |
| "contradiction": 2, |
| "entailment": 0, |
| "neutral": 1 |
| }, |
| "layer_norm_eps": 1e-07, |
| "max_position_embeddings": 512, |
| "max_relative_positions": -1, |
| "model_type": "deberta-v2", |
| "norm_rel_ebd": "layer_norm", |
| "num_attention_heads": 12, |
| "num_hidden_layers": 12, |
| "pad_token_id": 0, |
| "pooler_dropout": 0, |
| "pooler_hidden_act": "gelu", |
| "pooler_hidden_size": 768, |
| "pos_att_type": [ |
| "p2c", |
| "c2p" |
| ], |
| "position_biased_input": false, |
| "position_buckets": 256, |
| "relative_attention": true, |
| "share_att_key": true, |
| "tasks": [ |
| "xnli/ur", |
| "xnli/ar", |
| "xnli/bg", |
| "xnli/de", |
| "xnli/el", |
| "xnli/zh", |
| "xnli/vi", |
| "xnli/tr", |
| "xnli/th", |
| "xnli/sw", |
| "xnli/en", |
| "xnli/ru", |
| "xnli/es", |
| "xnli/fr", |
| "xnli/hi", |
| "americas_nli/all_languages", |
| "multilingual-NLI-26lang-2mil7", |
| "stsb_multi_mt/pl", |
| "stsb_multi_mt/it", |
| "stsb_multi_mt/fr", |
| "stsb_multi_mt/nl", |
| "stsb_multi_mt/es", |
| "stsb_multi_mt/de", |
| "stsb_multi_mt/en", |
| "stsb_multi_mt/pt", |
| "stsb_multi_mt/ru", |
| "stsb_multi_mt/zh", |
| "paws-x/zh", |
| "paws-x/ko", |
| "paws-x/fr", |
| "paws-x/es", |
| "paws-x/de", |
| "paws-x/en", |
| "paws-x/ja", |
| "miam/dihana", |
| "miam/ilisten", |
| "miam/loria", |
| "miam/maptask", |
| "miam/vm2", |
| "x-stance/de", |
| "x-stance/fr", |
| "multilingual-sentiments/all", |
| "universal-joy", |
| "amazon_reviews_multi/all_languages", |
| "tweet_sentiment_multilingual/all", |
| "tweet_sentiment_multilingual/portuguese", |
| "tweet_sentiment_multilingual/italian", |
| "tweet_sentiment_multilingual/spanish", |
| "tweet_sentiment_multilingual/german", |
| "tweet_sentiment_multilingual/french", |
| "tweet_sentiment_multilingual/english", |
| "tweet_sentiment_multilingual/arabic", |
| "tweet_sentiment_multilingual/hindi", |
| "offenseval_2020/tr", |
| "offenseval_2020/ar", |
| "offenseval_2020/da", |
| "offenseval_2020/gr", |
| "offenseval_dravidian/tamil", |
| "offenseval_dravidian/malayalam", |
| "offenseval_dravidian/kannada", |
| "MLMA_hate_speech", |
| "xglue/qam", |
| "xsum_factuality", |
| "x-fact", |
| "xglue/nc", |
| "xglue/qadsm", |
| "xglue/qam", |
| "xglue/wpr", |
| "xlwic/xlwic_en_ko", |
| "xlwic/xlwic_fr_fr", |
| "xlwic/xlwic_it_it", |
| "xlwic/xlwic_de_de", |
| "oasst1_dense_flat/quality", |
| "oasst1_dense_flat/toxicity", |
| "oasst1_dense_flat/helpfulness", |
| "language-identification", |
| "wili_2018", |
| "exams/multilingual", |
| "xcsr/X-CSQA-es", |
| "xcsr/X-CSQA-en", |
| "xcsr/X-CODAH-ur", |
| "xcsr/X-CODAH-sw", |
| "xcsr/X-CODAH-hi", |
| "xcsr/X-CODAH-vi", |
| "xcsr/X-CODAH-ar", |
| "xcsr/X-CODAH-ru", |
| "xcsr/X-CODAH-pt", |
| "xcsr/X-CODAH-pl", |
| "xcsr/X-CODAH-nl", |
| "xcsr/X-CODAH-jap", |
| "xcsr/X-CODAH-it", |
| "xcsr/X-CODAH-fr", |
| "xcsr/X-CODAH-es", |
| "xcsr/X-CODAH-de", |
| "xcsr/X-CODAH-zh", |
| "xcsr/X-CODAH-en", |
| "xcsr/X-CSQA-ur", |
| "xcsr/X-CSQA-sw", |
| "xcsr/X-CSQA-zh", |
| "xcsr/X-CSQA-vi", |
| "xcsr/X-CSQA-ar", |
| "xcsr/X-CSQA-ru", |
| "xcsr/X-CSQA-pt", |
| "xcsr/X-CSQA-pl", |
| "xcsr/X-CSQA-nl", |
| "xcsr/X-CSQA-jap", |
| "xcsr/X-CSQA-it", |
| "xcsr/X-CSQA-fr", |
| "xcsr/X-CSQA-de", |
| "xcsr/X-CSQA-hi", |
| "xcopa/translation-th", |
| "xcopa/sw", |
| "xcopa/et", |
| "xcopa/ht", |
| "xcopa/it", |
| "xcopa/id", |
| "xcopa/qu", |
| "xcopa/translation-vi", |
| "xcopa/zh", |
| "xcopa/ta", |
| "xcopa/th", |
| "xcopa/vi", |
| "xcopa/tr", |
| "xcopa/translation-ht", |
| "xcopa/translation-it", |
| "xcopa/translation-id", |
| "xcopa/translation-sw", |
| "xcopa/translation-zh", |
| "xcopa/translation-ta", |
| "xcopa/translation-tr", |
| "xcopa/translation-et", |
| "xstory_cloze/ar", |
| "xstory_cloze/te", |
| "xstory_cloze/sw", |
| "xstory_cloze/id", |
| "xstory_cloze/hi", |
| "xstory_cloze/es", |
| "xstory_cloze/my", |
| "xstory_cloze/ru", |
| "xstory_cloze/en", |
| "xstory_cloze/eu", |
| "xstory_cloze/zh", |
| "hh-rlhf", |
| "xglue/ner", |
| "xglue/pos" |
| ], |
| "torch_dtype": "float32", |
| "transformers_version": "4.26.1", |
| "type_vocab_size": 0, |
| "vocab_size": 251000 |
| } |
|
|