dvssr
/

unipelt

text2text-generation

Generated from Trainer

Model card Files Files and versions

Metrics Training metrics Community

unipelt / config.json

dvssr's picture

Model save

5abd34e verified almost 2 years ago

history blame contribute delete

3.94 kB

	{
	"_name_or_path": "mse30/bart-base-finetuned-pubmed",
	"activation_dropout": 0.1,
	"activation_function": "gelu",
	"adapters": {
	"adapters": {
	"unipelt": "0815dd75d5ff5808"
	},
	"config_map": {
	"0815dd75d5ff5808": {
	"architecture": "union",
	"configs": [
	{
	"alpha": 8,
	"architecture": "lora",
	"attn_matrices": [
	"q",
	"v"
	],
	"composition_mode": "add",
	"dropout": 0.0,
	"init_weights": "lora",
	"intermediate_lora": false,
	"leave_out": [],
	"output_lora": false,
	"r": 8,
	"selfattn_lora": true,
	"use_gating": true
	},
	{
	"architecture": "prefix_tuning",
	"bottleneck_size": 512,
	"cross_prefix": true,
	"dropout": 0.0,
	"encoder_prefix": true,
	"flat": false,
	"leave_out": [],
	"non_linearity": "tanh",
	"prefix_length": 10,
	"shared_gating": true,
	"use_gating": true
	},
	{
	"adapter_residual_before_ln": false,
	"cross_adapter": false,
	"factorized_phm_W": true,
	"factorized_phm_rule": false,
	"hypercomplex_nonlinearity": "glorot-uniform",
	"init_weights": "bert",
	"inv_adapter": null,
	"inv_adapter_reduction_factor": null,
	"is_parallel": false,
	"learn_phm": true,
	"leave_out": [],
	"ln_after": false,
	"ln_before": false,
	"mh_adapter": false,
	"non_linearity": "relu",
	"original_ln_after": true,
	"original_ln_before": true,
	"output_adapter": true,
	"phm_bias": true,
	"phm_c_init": "normal",
	"phm_dim": 4,
	"phm_init_range": 0.0001,
	"phm_layer": false,
	"phm_rank": 1,
	"reduction_factor": 16,
	"residual_before_ln": true,
	"scaling": 1.0,
	"shared_W_phm": false,
	"shared_phm_rule": true,
	"use_gating": true
	}
	]
	}
	},
	"fusion_config_map": {},
	"fusions": {}
	},
	"add_bias_logits": false,
	"add_final_layer_norm": false,
	"architectures": [
	"BartForConditionalGeneration"
	],
	"attention_dropout": 0.1,
	"bos_token_id": 0,
	"classif_dropout": 0.1,
	"classifier_dropout": 0.0,
	"d_model": 768,
	"decoder_attention_heads": 12,
	"decoder_ffn_dim": 3072,
	"decoder_layerdrop": 0.0,
	"decoder_layers": 6,
	"decoder_start_token_id": 2,
	"dropout": 0.1,
	"early_stopping": true,
	"encoder_attention_heads": 12,
	"encoder_ffn_dim": 3072,
	"encoder_layerdrop": 0.0,
	"encoder_layers": 6,
	"eos_token_id": 2,
	"forced_eos_token_id": 2,
	"gradient_checkpointing": false,
	"id2label": {
	"0": "LABEL_0",
	"1": "LABEL_1",
	"2": "LABEL_2"
	},
	"init_std": 0.02,
	"is_encoder_decoder": true,
	"label2id": {
	"LABEL_0": 0,
	"LABEL_1": 1,
	"LABEL_2": 2
	},
	"max_position_embeddings": 1024,
	"model_type": "bart",
	"no_repeat_ngram_size": 3,
	"normalize_before": false,
	"normalize_embedding": true,
	"num_beams": 4,
	"num_hidden_layers": 6,
	"pad_token_id": 1,
	"scale_embedding": false,
	"task_specific_params": {
	"summarization": {
	"length_penalty": 1.0,
	"max_length": 128,
	"min_length": 12,
	"num_beams": 4
	},
	"summarization_cnn": {
	"length_penalty": 2.0,
	"max_length": 142,
	"min_length": 56,
	"num_beams": 4
	},
	"summarization_xsum": {
	"length_penalty": 1.0,
	"max_length": 62,
	"min_length": 11,
	"num_beams": 6
	}
	},
	"torch_dtype": "float32",
	"transformers_version": "4.36.2",
	"use_cache": true,
	"vocab_size": 50265
	}