apple
/

CLaRa-7B-Instruct

instruction-tuned

Model card Files Files and versions

CLaRa-7B-Instruct / compression-128 /config.json

yizheapple's picture

Add model

9d72642 verified 4 months ago

1.13 kB

	{
	"ae_mode": "token",
	"attn_implementation": null,
	"auto_map": {
	"AutoConfig": "modeling_clara.CLaRaConfig",
	"AutoModel": "modeling_clara.CLaRa"
	},
	"compr_base_model_name": "/mnt/ceph_rbd/model/Mistral-7B-Instruct-v0.2",
	"compr_every_n_layer": null,
	"compr_linear_type": "concat",
	"compr_mlp_hidden_dim": 8096,
	"compr_model_name": null,
	"compr_n_layers": 5,
	"compr_rate": 128,
	"compr_rms_norm": false,
	"compr_use_mlp": false,
	"decoder_model_name": "/mnt/conductor_data/data/hf_models/Mistral-7B-Instruct-v0.2",
	"device_map": null,
	"different_mem_tokens": true,
	"doc_max_length": 256,
	"generation_top_k": 5,
	"kbtc_training": false,
	"load_adapters": true,
	"load_pretrained_checkpoint": false,
	"lora": true,
	"lora_compressor": false,
	"lora_r": 16,
	"lora_r_compressor": 16,
	"max_new_tokens": 128,
	"model_type": "CLaRa",
	"optimize_mem_tokens": true,
	"pad_token_id": 2,
	"pure_inference": false,
	"quantization": "no",
	"sep": true,
	"stage2_retrieval_top_n": 1,
	"training_form": "both_separately",
	"training_stage": "stage1_2",
	"transformers_version": "4.53.3"
	}