{ "adaptor_config": { "alpha_pattern": {}, "auto_mapping": null, "base_model_name_or_path": "", "bias": "none", "corda_config": null, "eva_config": null, "exclude_modules": null, "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "layer_replication": null, "layers_pattern": null, "layers_to_transform": null, "loftq_config": {}, "lora_alpha": 16, "lora_bias": false, "lora_dropout": 0.1, "megatron_config": null, "megatron_core": "megatron.core", "modules_to_save": null, "peft_type": "LORA", "qalora_group_size": 16, "r": 8, "rank_pattern": {}, "revision": null, "target_modules": [ "up_proj_k", "out_proj", "fc2", "up_proj_v", "q_proj", "dkv_proj", "fc1" ], "task_type": "CAUSAL_LM", "trainable_token_indices": null, "use_dora": false, "use_qalora": false, "use_rslora": false }, "balance_loss_weight": 0.01, "bos_token_id": 2, "dropout": 0.1, "embed_dim": 640, "eos_token_id": 3, "experts_per_token": 2, "latent_dim": 160, "max_seq_len": 256, "mlp_dim": 1536, "model_type": "latent_moe_shim", "num_experts": 6, "num_heads": 8, "num_layers": 8, "pad_token_id": 0, "transformers_version": "4.53.2", "vocab_size": 30000 }