DerivedFunction's picture
Training in progress, step 2500
2dd241a verified
{
"add_cross_attention": false,
"architectures": [
"XLMRobertaForSequenceClassification"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"classifier_dropout": null,
"dtype": "float32",
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "EN",
"1": "ES",
"2": "FR",
"3": "DE",
"4": "IT",
"5": "PT",
"6": "NL",
"7": "VI",
"8": "TR",
"9": "LA",
"10": "ID",
"11": "MS",
"12": "AF",
"13": "SQ",
"14": "IS",
"15": "NO",
"16": "SV",
"17": "DA",
"18": "FI",
"19": "HU",
"20": "PL",
"21": "CS",
"22": "RO",
"23": "RU",
"24": "BG",
"25": "UK",
"26": "SR",
"27": "BE",
"28": "KK",
"29": "MK",
"30": "MN",
"31": "ZH",
"32": "JA",
"33": "KO",
"34": "HI",
"35": "UR",
"36": "BN",
"37": "TA",
"38": "TE",
"39": "MR",
"40": "GU",
"41": "KN",
"42": "ML",
"43": "PA",
"44": "AS",
"45": "OR",
"46": "AR",
"47": "FA",
"48": "PS",
"49": "SD",
"50": "UG",
"51": "EL",
"52": "HE",
"53": "YI",
"54": "HY",
"55": "KA",
"56": "AM",
"57": "KM",
"58": "LO",
"59": "MY",
"60": "TH",
"61": "SI",
"62": "BO",
"63": "DV",
"64": "TI",
"65": "SW",
"66": "EU",
"67": "TL",
"68": "XH",
"69": "CA",
"70": "GL",
"71": "OC",
"72": "BR",
"73": "GA",
"74": "GD",
"75": "CY",
"76": "SCO",
"77": "BS",
"78": "HR",
"79": "SL",
"80": "SK",
"81": "ET",
"82": "LV",
"83": "LT",
"84": "EO",
"85": "JV",
"86": "MG",
"87": "OM",
"88": "SO",
"89": "SU",
"90": "UZ",
"91": "KU",
"92": "CKB",
"93": "NE",
"94": "MT",
"95": "LB",
"96": "RM",
"97": "TT",
"98": "KY",
"99": "TG",
"100": "BA",
"101": "YO",
"102": "ZU",
"103": "NY",
"104": "CE"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"is_decoder": false,
"label2id": {
"AF": 12,
"AM": 56,
"AR": 46,
"AS": 44,
"BA": 100,
"BE": 27,
"BG": 24,
"BN": 36,
"BO": 62,
"BR": 72,
"BS": 77,
"CA": 69,
"CE": 104,
"CKB": 92,
"CS": 21,
"CY": 75,
"DA": 17,
"DE": 3,
"DV": 63,
"EL": 51,
"EN": 0,
"EO": 84,
"ES": 1,
"ET": 81,
"EU": 66,
"FA": 47,
"FI": 18,
"FR": 2,
"GA": 73,
"GD": 74,
"GL": 70,
"GU": 40,
"HE": 52,
"HI": 34,
"HR": 78,
"HU": 19,
"HY": 54,
"ID": 10,
"IS": 14,
"IT": 4,
"JA": 32,
"JV": 85,
"KA": 55,
"KK": 28,
"KM": 57,
"KN": 41,
"KO": 33,
"KU": 91,
"KY": 98,
"LA": 9,
"LB": 95,
"LO": 58,
"LT": 83,
"LV": 82,
"MG": 86,
"MK": 29,
"ML": 42,
"MN": 30,
"MR": 39,
"MS": 11,
"MT": 94,
"MY": 59,
"NE": 93,
"NL": 6,
"NO": 15,
"NY": 103,
"OC": 71,
"OM": 87,
"OR": 45,
"PA": 43,
"PL": 20,
"PS": 48,
"PT": 5,
"RM": 96,
"RO": 22,
"RU": 23,
"SCO": 76,
"SD": 49,
"SI": 61,
"SK": 80,
"SL": 79,
"SO": 88,
"SQ": 13,
"SR": 26,
"SU": 89,
"SV": 16,
"SW": 65,
"TA": 37,
"TE": 38,
"TG": 99,
"TH": 60,
"TI": 64,
"TL": 67,
"TR": 8,
"TT": 97,
"UG": 50,
"UK": 25,
"UR": 35,
"UZ": 90,
"VI": 7,
"XH": 68,
"YI": 53,
"YO": 101,
"ZH": 31,
"ZU": 102
},
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "xlm-roberta",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"output_past": true,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"problem_type": "multi_label_classification",
"tie_word_embeddings": true,
"transformers_version": "5.5.4",
"type_vocab_size": 1,
"use_cache": false,
"vocab_size": 250002
}