yigagilbert's picture
End of training
b2261ea verified
{
"architectures": [
"T5ForSequenceClassification"
],
"classifier_dropout": 0.0,
"d_ff": 1024,
"d_kv": 64,
"d_model": 256,
"decoder_start_token_id": 0,
"dense_act_fn": "relu",
"dropout_rate": 0.1,
"dtype": "float32",
"eos_token_id": 1,
"feed_forward_proj": "relu",
"id2label": {
"0": "ach",
"1": "adh",
"2": "afr",
"3": "aka",
"4": "alz",
"5": "amh",
"6": "bam",
"7": "bem",
"8": "ber",
"9": "bfa",
"10": "cgg",
"11": "dag",
"12": "dga",
"13": "din",
"14": "eng",
"15": "ewe",
"16": "fra",
"17": "ful",
"18": "gwr",
"19": "hau",
"20": "ibo",
"21": "ikx",
"22": "kab",
"23": "kau",
"24": "kdi",
"25": "kdj",
"26": "keo",
"27": "kik",
"28": "kin",
"29": "koo",
"30": "kpz",
"31": "laj",
"32": "led",
"33": "lgg",
"34": "lin",
"35": "lsm",
"36": "luc",
"37": "lug",
"38": "luo",
"39": "luy",
"40": "mhi",
"41": "mlg",
"42": "myx",
"43": "nbl",
"44": "nuj",
"45": "nya",
"46": "nyn",
"47": "nyo",
"48": "orm",
"49": "pcm",
"50": "pok",
"51": "rub",
"52": "ruc",
"53": "run",
"54": "rwm",
"55": "sna",
"56": "som",
"57": "sot",
"58": "swa",
"59": "teo",
"60": "tlj",
"61": "tsn",
"62": "ttj",
"63": "wol",
"64": "xho",
"65": "xog",
"66": "yor",
"67": "zul"
},
"initializer_factor": 1.0,
"is_decoder": false,
"is_encoder_decoder": true,
"is_gated_act": false,
"label2id": {
"ach": 0,
"adh": 1,
"afr": 2,
"aka": 3,
"alz": 4,
"amh": 5,
"bam": 6,
"bem": 7,
"ber": 8,
"bfa": 9,
"cgg": 10,
"dag": 11,
"dga": 12,
"din": 13,
"eng": 14,
"ewe": 15,
"fra": 16,
"ful": 17,
"gwr": 18,
"hau": 19,
"ibo": 20,
"ikx": 21,
"kab": 22,
"kau": 23,
"kdi": 24,
"kdj": 25,
"keo": 26,
"kik": 27,
"kin": 28,
"koo": 29,
"kpz": 30,
"laj": 31,
"led": 32,
"lgg": 33,
"lin": 34,
"lsm": 35,
"luc": 36,
"lug": 37,
"luo": 38,
"luy": 39,
"mhi": 40,
"mlg": 41,
"myx": 42,
"nbl": 43,
"nuj": 44,
"nya": 45,
"nyn": 46,
"nyo": 47,
"orm": 48,
"pcm": 49,
"pok": 50,
"rub": 51,
"ruc": 52,
"run": 53,
"rwm": 54,
"sna": 55,
"som": 56,
"sot": 57,
"swa": 58,
"teo": 59,
"tlj": 60,
"tsn": 61,
"ttj": 62,
"wol": 63,
"xho": 64,
"xog": 65,
"yor": 66,
"zul": 67
},
"layer_norm_epsilon": 1e-06,
"model_type": "t5",
"n_positions": 512,
"num_decoder_layers": 4,
"num_heads": 4,
"num_layers": 4,
"pad_token_id": 0,
"problem_type": "single_label_classification",
"relative_attention_max_distance": 128,
"relative_attention_num_buckets": 32,
"scale_decoder_outputs": true,
"tie_word_embeddings": true,
"transformers_version": "5.8.0",
"use_cache": false,
"vocab_size": 32128
}