| { | |
| "alpha": 64, | |
| "architectures": [ | |
| "ParameterGenerator" | |
| ], | |
| "d_model": 4096, | |
| "dim_accumulation": 4, | |
| "dtype": "float32", | |
| "head_dim": 128, | |
| "input_dim": 4096, | |
| "model_type": "parameter_generator", | |
| "num_base_model_layers": 32, | |
| "num_pg_layers": 24, | |
| "output_dim": 1024, | |
| "pg_mapping": { | |
| "mlp.experts": { | |
| "num_experts": 64, | |
| "sub_weights": { | |
| "w1": { | |
| "lora_A_dim": 4096, | |
| "lora_B_dim": 3072 | |
| }, | |
| "w2": { | |
| "lora_A_dim": 3072, | |
| "lora_B_dim": 4096 | |
| }, | |
| "w3": { | |
| "lora_A_dim": 4096, | |
| "lora_B_dim": 3072 | |
| } | |
| }, | |
| "type": "grouped" | |
| }, | |
| "mlp.shared_mlp.down_proj": { | |
| "lora_A_dim": 3072, | |
| "lora_B_dim": 4096 | |
| }, | |
| "mlp.shared_mlp.gate_and_up_proj": { | |
| "lora_A_dim": 4096, | |
| "lora_B_dim": 6144 | |
| }, | |
| "self_attn.o_proj": { | |
| "lora_A_dim": 4096, | |
| "lora_B_dim": 4096 | |
| }, | |
| "self_attn.qkv_proj": { | |
| "lora_A_dim": 4096, | |
| "lora_B_dim": 6144 | |
| } | |
| }, | |
| "prefix": "model.layers.", | |
| "rank": 16, | |
| "token_dim": 1024, | |
| "transformers_version": "4.57.1" | |
| } | |