tjayant
/

bloom-560m

Text Classification

Model card Files Files and versions

bloom-560m / config.json

tjayant's picture

Training in progress, epoch 1

2d4e5cf almost 3 years ago

history blame contribute delete

1.26 kB

	{
	"_name_or_path": "bigscience/bloom-560m",
	"apply_residual_connection_post_layernorm": false,
	"architectures": [
	"BloomForSequenceClassification"
	],
	"attention_dropout": 0.0,
	"attention_softmax_in_fp32": true,
	"bias_dropout_fusion": true,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"hidden_dropout": 0.0,
	"hidden_size": 1024,
	"id2label": {
	"0": "A",
	"1": "B",
	"2": "C",
	"3": "D",
	"4": "E",
	"5": "F",
	"6": "G",
	"7": "H",
	"8": "I",
	"9": "J",
	"10": "L",
	"11": "M",
	"12": "N",
	"13": "Z"
	},
	"initializer_range": 0.02,
	"label2id": {
	"A": 0,
	"B": 1,
	"C": 2,
	"D": 3,
	"E": 4,
	"F": 5,
	"G": 6,
	"H": 7,
	"I": 8,
	"J": 9,
	"L": 10,
	"M": 11,
	"N": 12,
	"Z": 13
	},
	"layer_norm_epsilon": 1e-05,
	"masked_softmax_fusion": true,
	"model_type": "bloom",
	"n_head": 16,
	"n_inner": null,
	"n_layer": 24,
	"offset_alibi": 100,
	"pad_token_id": 3,
	"pretraining_tp": 1,
	"problem_type": "multi_label_classification",
	"skip_bias_add": true,
	"skip_bias_add_qkv": false,
	"slow_but_exact": false,
	"torch_dtype": "float32",
	"transformers_version": "4.28.1",
	"unk_token_id": 0,
	"use_cache": true,
	"vocab_size": 250880
	}