| { |
| "ae_mode": "token", |
| "attn_implementation": null, |
| "auto_map": { |
| "AutoConfig": "modeling_clara.CLaRaConfig", |
| "AutoModel": "modeling_clara.CLaRa" |
| }, |
| "compr_base_model_name": "/mnt/ceph_rbd/model/Mistral-7B-Instruct-v0.2", |
| "compr_every_n_layer": null, |
| "compr_linear_type": "concat", |
| "compr_mlp_hidden_dim": 8096, |
| "compr_model_name": null, |
| "compr_n_layers": 5, |
| "compr_rate": 128, |
| "compr_rms_norm": false, |
| "compr_use_mlp": false, |
| "decoder_model_name": "/mnt/conductor_data/data/hf_models/Mistral-7B-Instruct-v0.2", |
| "device_map": null, |
| "different_mem_tokens": true, |
| "doc_max_length": 256, |
| "generation_top_k": 5, |
| "kbtc_training": false, |
| "load_adapters": true, |
| "load_pretrained_checkpoint": false, |
| "lora": true, |
| "lora_compressor": false, |
| "lora_r": 16, |
| "lora_r_compressor": 16, |
| "max_new_tokens": 128, |
| "model_type": "CLaRa", |
| "optimize_mem_tokens": true, |
| "pad_token_id": 2, |
| "pure_inference": false, |
| "quantization": "no", |
| "sep": true, |
| "stage2_retrieval_top_n": 1, |
| "training_form": "both_separately", |
| "training_stage": "stage1_2", |
| "transformers_version": "4.53.3" |
| } |
|
|