yigagilbert's picture
End of training
d642f75 verified
{
"architectures": [
"T5ForSequenceClassification"
],
"classifier_dropout": 0.0,
"d_ff": 2048,
"d_kv": 64,
"d_model": 512,
"decoder_start_token_id": 0,
"dense_act_fn": "relu",
"dropout_rate": 0.1,
"dtype": "float32",
"eos_token_id": 1,
"feed_forward_proj": "relu",
"id2label": {
"0": "ach",
"1": "adh",
"2": "bfa",
"3": "cgg",
"4": "eng",
"5": "gwr",
"6": "kdi",
"7": "kdj",
"8": "keo",
"9": "kin",
"10": "kpz",
"11": "laj",
"12": "lgg",
"13": "lsm",
"14": "luc",
"15": "lug",
"16": "mhi",
"17": "myx",
"18": "ndp",
"19": "nuj",
"20": "nyn",
"21": "nyo",
"22": "rub",
"23": "swa",
"24": "teo",
"25": "tlj",
"26": "ttj",
"27": "xog"
},
"initializer_factor": 1.0,
"is_encoder_decoder": true,
"is_gated_act": false,
"label2id": {
"ach": 0,
"adh": 1,
"bfa": 2,
"cgg": 3,
"eng": 4,
"gwr": 5,
"kdi": 6,
"kdj": 7,
"keo": 8,
"kin": 9,
"kpz": 10,
"laj": 11,
"lgg": 12,
"lsm": 13,
"luc": 14,
"lug": 15,
"mhi": 16,
"myx": 17,
"ndp": 18,
"nuj": 19,
"nyn": 20,
"nyo": 21,
"rub": 22,
"swa": 23,
"teo": 24,
"tlj": 25,
"ttj": 26,
"xog": 27
},
"layer_norm_epsilon": 1e-06,
"model_type": "t5",
"n_positions": 512,
"num_decoder_layers": 6,
"num_heads": 8,
"num_layers": 6,
"pad_token_id": 0,
"problem_type": "single_label_classification",
"relative_attention_max_distance": 128,
"relative_attention_num_buckets": 32,
"transformers_version": "4.57.1",
"use_cache": true,
"vocab_size": 32128
}