{"trainable_params": 24520288, "trainable_params_kv": [["llama.layers.0.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.0.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.0.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.0.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.0.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.0.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.0.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.0.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.0.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.0.attention.prompt.weight", [10, 4096]], ["llama.layers.0.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.0.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.0.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.0.p_adapter.down_proj.bias", [16]], ["llama.layers.0.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.0.p_adapter.up_proj.bias", [4096]], ["llama.layers.0.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.0.adapter_type_router.w1.bias", [28]], ["llama.layers.0.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.0.adapter_type_router.w2.bias", [7]], ["llama.layers.0.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.0.adapter_type_router.w3.bias", [28]], ["llama.layers.1.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.1.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.1.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.1.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.1.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.1.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.1.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.1.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.1.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.1.attention.prompt.weight", [10, 4096]], ["llama.layers.1.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.1.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.1.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.1.p_adapter.down_proj.bias", [16]], ["llama.layers.1.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.1.p_adapter.up_proj.bias", [4096]], ["llama.layers.1.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.1.adapter_type_router.w1.bias", [28]], ["llama.layers.1.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.1.adapter_type_router.w2.bias", [7]], ["llama.layers.1.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.1.adapter_type_router.w3.bias", [28]], ["llama.layers.2.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.2.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.2.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.2.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.2.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.2.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.2.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.2.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.2.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.2.attention.prompt.weight", [10, 4096]], ["llama.layers.2.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.2.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.2.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.2.p_adapter.down_proj.bias", [16]], ["llama.layers.2.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.2.p_adapter.up_proj.bias", [4096]], ["llama.layers.2.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.2.adapter_type_router.w1.bias", [28]], ["llama.layers.2.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.2.adapter_type_router.w2.bias", [7]], ["llama.layers.2.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.2.adapter_type_router.w3.bias", [28]], ["llama.layers.3.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.3.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.3.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.3.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.3.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.3.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.3.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.3.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.3.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.3.attention.prompt.weight", [10, 4096]], ["llama.layers.3.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.3.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.3.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.3.p_adapter.down_proj.bias", [16]], ["llama.layers.3.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.3.p_adapter.up_proj.bias", [4096]], ["llama.layers.3.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.3.adapter_type_router.w1.bias", [28]], ["llama.layers.3.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.3.adapter_type_router.w2.bias", [7]], ["llama.layers.3.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.3.adapter_type_router.w3.bias", [28]], ["llama.layers.4.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.4.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.4.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.4.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.4.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.4.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.4.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.4.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.4.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.4.attention.prompt.weight", [10, 4096]], ["llama.layers.4.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.4.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.4.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.4.p_adapter.down_proj.bias", [16]], ["llama.layers.4.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.4.p_adapter.up_proj.bias", [4096]], ["llama.layers.4.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.4.adapter_type_router.w1.bias", [28]], ["llama.layers.4.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.4.adapter_type_router.w2.bias", [7]], ["llama.layers.4.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.4.adapter_type_router.w3.bias", [28]], ["llama.layers.5.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.5.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.5.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.5.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.5.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.5.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.5.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.5.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.5.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.5.attention.prompt.weight", [10, 4096]], ["llama.layers.5.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.5.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.5.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.5.p_adapter.down_proj.bias", [16]], ["llama.layers.5.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.5.p_adapter.up_proj.bias", [4096]], ["llama.layers.5.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.5.adapter_type_router.w1.bias", [28]], ["llama.layers.5.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.5.adapter_type_router.w2.bias", [7]], ["llama.layers.5.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.5.adapter_type_router.w3.bias", [28]], ["llama.layers.6.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.6.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.6.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.6.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.6.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.6.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.6.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.6.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.6.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.6.attention.prompt.weight", [10, 4096]], ["llama.layers.6.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.6.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.6.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.6.p_adapter.down_proj.bias", [16]], ["llama.layers.6.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.6.p_adapter.up_proj.bias", [4096]], ["llama.layers.6.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.6.adapter_type_router.w1.bias", [28]], ["llama.layers.6.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.6.adapter_type_router.w2.bias", [7]], ["llama.layers.6.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.6.adapter_type_router.w3.bias", [28]], ["llama.layers.7.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.7.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.7.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.7.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.7.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.7.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.7.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.7.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.7.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.7.attention.prompt.weight", [10, 4096]], ["llama.layers.7.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.7.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.7.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.7.p_adapter.down_proj.bias", [16]], ["llama.layers.7.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.7.p_adapter.up_proj.bias", [4096]], ["llama.layers.7.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.7.adapter_type_router.w1.bias", [28]], ["llama.layers.7.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.7.adapter_type_router.w2.bias", [7]], ["llama.layers.7.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.7.adapter_type_router.w3.bias", [28]], ["llama.layers.8.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.8.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.8.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.8.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.8.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.8.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.8.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.8.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.8.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.8.attention.prompt.weight", [10, 4096]], ["llama.layers.8.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.8.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.8.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.8.p_adapter.down_proj.bias", [16]], ["llama.layers.8.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.8.p_adapter.up_proj.bias", [4096]], ["llama.layers.8.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.8.adapter_type_router.w1.bias", [28]], ["llama.layers.8.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.8.adapter_type_router.w2.bias", [7]], ["llama.layers.8.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.8.adapter_type_router.w3.bias", [28]], ["llama.layers.9.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.9.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.9.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.9.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.9.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.9.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.9.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.9.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.9.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.9.attention.prompt.weight", [10, 4096]], ["llama.layers.9.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.9.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.9.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.9.p_adapter.down_proj.bias", [16]], ["llama.layers.9.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.9.p_adapter.up_proj.bias", [4096]], ["llama.layers.9.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.9.adapter_type_router.w1.bias", [28]], ["llama.layers.9.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.9.adapter_type_router.w2.bias", [7]], ["llama.layers.9.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.9.adapter_type_router.w3.bias", [28]], ["llama.layers.10.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.10.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.10.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.10.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.10.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.10.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.10.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.10.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.10.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.10.attention.prompt.weight", [10, 4096]], ["llama.layers.10.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.10.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.10.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.10.p_adapter.down_proj.bias", [16]], ["llama.layers.10.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.10.p_adapter.up_proj.bias", [4096]], ["llama.layers.10.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.10.adapter_type_router.w1.bias", [28]], ["llama.layers.10.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.10.adapter_type_router.w2.bias", [7]], ["llama.layers.10.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.10.adapter_type_router.w3.bias", [28]], ["llama.layers.11.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.11.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.11.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.11.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.11.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.11.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.11.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.11.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.11.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.11.attention.prompt.weight", [10, 4096]], ["llama.layers.11.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.11.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.11.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.11.p_adapter.down_proj.bias", [16]], ["llama.layers.11.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.11.p_adapter.up_proj.bias", [4096]], ["llama.layers.11.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.11.adapter_type_router.w1.bias", [28]], ["llama.layers.11.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.11.adapter_type_router.w2.bias", [7]], ["llama.layers.11.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.11.adapter_type_router.w3.bias", [28]], ["llama.layers.12.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.12.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.12.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.12.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.12.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.12.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.12.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.12.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.12.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.12.attention.prompt.weight", [10, 4096]], ["llama.layers.12.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.12.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.12.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.12.p_adapter.down_proj.bias", [16]], ["llama.layers.12.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.12.p_adapter.up_proj.bias", [4096]], ["llama.layers.12.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.12.adapter_type_router.w1.bias", [28]], ["llama.layers.12.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.12.adapter_type_router.w2.bias", [7]], ["llama.layers.12.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.12.adapter_type_router.w3.bias", [28]], ["llama.layers.13.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.13.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.13.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.13.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.13.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.13.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.13.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.13.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.13.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.13.attention.prompt.weight", [10, 4096]], ["llama.layers.13.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.13.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.13.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.13.p_adapter.down_proj.bias", [16]], ["llama.layers.13.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.13.p_adapter.up_proj.bias", [4096]], ["llama.layers.13.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.13.adapter_type_router.w1.bias", [28]], ["llama.layers.13.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.13.adapter_type_router.w2.bias", [7]], ["llama.layers.13.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.13.adapter_type_router.w3.bias", [28]], ["llama.layers.14.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.14.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.14.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.14.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.14.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.14.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.14.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.14.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.14.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.14.attention.prompt.weight", [10, 4096]], ["llama.layers.14.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.14.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.14.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.14.p_adapter.down_proj.bias", [16]], ["llama.layers.14.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.14.p_adapter.up_proj.bias", [4096]], ["llama.layers.14.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.14.adapter_type_router.w1.bias", [28]], ["llama.layers.14.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.14.adapter_type_router.w2.bias", [7]], ["llama.layers.14.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.14.adapter_type_router.w3.bias", [28]], ["llama.layers.15.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.15.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.15.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.15.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.15.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.15.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.15.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.15.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.15.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.15.attention.prompt.weight", [10, 4096]], ["llama.layers.15.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.15.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.15.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.15.p_adapter.down_proj.bias", [16]], ["llama.layers.15.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.15.p_adapter.up_proj.bias", [4096]], ["llama.layers.15.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.15.adapter_type_router.w1.bias", [28]], ["llama.layers.15.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.15.adapter_type_router.w2.bias", [7]], ["llama.layers.15.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.15.adapter_type_router.w3.bias", [28]], ["llama.layers.16.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.16.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.16.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.16.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.16.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.16.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.16.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.16.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.16.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.16.attention.prompt.weight", [10, 4096]], ["llama.layers.16.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.16.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.16.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.16.p_adapter.down_proj.bias", [16]], ["llama.layers.16.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.16.p_adapter.up_proj.bias", [4096]], ["llama.layers.16.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.16.adapter_type_router.w1.bias", [28]], ["llama.layers.16.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.16.adapter_type_router.w2.bias", [7]], ["llama.layers.16.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.16.adapter_type_router.w3.bias", [28]], ["llama.layers.17.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.17.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.17.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.17.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.17.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.17.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.17.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.17.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.17.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.17.attention.prompt.weight", [10, 4096]], ["llama.layers.17.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.17.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.17.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.17.p_adapter.down_proj.bias", [16]], ["llama.layers.17.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.17.p_adapter.up_proj.bias", [4096]], ["llama.layers.17.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.17.adapter_type_router.w1.bias", [28]], ["llama.layers.17.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.17.adapter_type_router.w2.bias", [7]], ["llama.layers.17.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.17.adapter_type_router.w3.bias", [28]], ["llama.layers.18.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.18.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.18.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.18.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.18.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.18.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.18.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.18.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.18.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.18.attention.prompt.weight", [10, 4096]], ["llama.layers.18.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.18.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.18.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.18.p_adapter.down_proj.bias", [16]], ["llama.layers.18.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.18.p_adapter.up_proj.bias", [4096]], ["llama.layers.18.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.18.adapter_type_router.w1.bias", [28]], ["llama.layers.18.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.18.adapter_type_router.w2.bias", [7]], ["llama.layers.18.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.18.adapter_type_router.w3.bias", [28]], ["llama.layers.19.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.19.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.19.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.19.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.19.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.19.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.19.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.19.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.19.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.19.attention.prompt.weight", [10, 4096]], ["llama.layers.19.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.19.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.19.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.19.p_adapter.down_proj.bias", [16]], ["llama.layers.19.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.19.p_adapter.up_proj.bias", [4096]], ["llama.layers.19.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.19.adapter_type_router.w1.bias", [28]], ["llama.layers.19.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.19.adapter_type_router.w2.bias", [7]], ["llama.layers.19.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.19.adapter_type_router.w3.bias", [28]], ["llama.layers.20.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.20.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.20.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.20.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.20.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.20.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.20.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.20.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.20.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.20.attention.prompt.weight", [10, 4096]], ["llama.layers.20.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.20.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.20.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.20.p_adapter.down_proj.bias", [16]], ["llama.layers.20.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.20.p_adapter.up_proj.bias", [4096]], ["llama.layers.20.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.20.adapter_type_router.w1.bias", [28]], ["llama.layers.20.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.20.adapter_type_router.w2.bias", [7]], ["llama.layers.20.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.20.adapter_type_router.w3.bias", [28]], ["llama.layers.21.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.21.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.21.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.21.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.21.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.21.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.21.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.21.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.21.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.21.attention.prompt.weight", [10, 4096]], ["llama.layers.21.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.21.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.21.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.21.p_adapter.down_proj.bias", [16]], ["llama.layers.21.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.21.p_adapter.up_proj.bias", [4096]], ["llama.layers.21.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.21.adapter_type_router.w1.bias", [28]], ["llama.layers.21.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.21.adapter_type_router.w2.bias", [7]], ["llama.layers.21.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.21.adapter_type_router.w3.bias", [28]], ["llama.layers.22.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.22.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.22.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.22.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.22.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.22.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.22.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.22.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.22.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.22.attention.prompt.weight", [10, 4096]], ["llama.layers.22.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.22.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.22.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.22.p_adapter.down_proj.bias", [16]], ["llama.layers.22.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.22.p_adapter.up_proj.bias", [4096]], ["llama.layers.22.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.22.adapter_type_router.w1.bias", [28]], ["llama.layers.22.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.22.adapter_type_router.w2.bias", [7]], ["llama.layers.22.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.22.adapter_type_router.w3.bias", [28]], ["llama.layers.23.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.23.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.23.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.23.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.23.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.23.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.23.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.23.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.23.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.23.attention.prompt.weight", [10, 4096]], ["llama.layers.23.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.23.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.23.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.23.p_adapter.down_proj.bias", [16]], ["llama.layers.23.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.23.p_adapter.up_proj.bias", [4096]], ["llama.layers.23.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.23.adapter_type_router.w1.bias", [28]], ["llama.layers.23.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.23.adapter_type_router.w2.bias", [7]], ["llama.layers.23.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.23.adapter_type_router.w3.bias", [28]], ["llama.layers.24.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.24.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.24.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.24.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.24.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.24.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.24.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.24.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.24.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.24.attention.prompt.weight", [10, 4096]], ["llama.layers.24.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.24.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.24.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.24.p_adapter.down_proj.bias", [16]], ["llama.layers.24.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.24.p_adapter.up_proj.bias", [4096]], ["llama.layers.24.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.24.adapter_type_router.w1.bias", [28]], ["llama.layers.24.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.24.adapter_type_router.w2.bias", [7]], ["llama.layers.24.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.24.adapter_type_router.w3.bias", [28]], ["llama.layers.25.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.25.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.25.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.25.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.25.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.25.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.25.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.25.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.25.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.25.attention.prompt.weight", [10, 4096]], ["llama.layers.25.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.25.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.25.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.25.p_adapter.down_proj.bias", [16]], ["llama.layers.25.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.25.p_adapter.up_proj.bias", [4096]], ["llama.layers.25.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.25.adapter_type_router.w1.bias", [28]], ["llama.layers.25.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.25.adapter_type_router.w2.bias", [7]], ["llama.layers.25.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.25.adapter_type_router.w3.bias", [28]], ["llama.layers.26.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.26.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.26.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.26.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.26.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.26.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.26.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.26.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.26.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.26.attention.prompt.weight", [10, 4096]], ["llama.layers.26.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.26.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.26.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.26.p_adapter.down_proj.bias", [16]], ["llama.layers.26.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.26.p_adapter.up_proj.bias", [4096]], ["llama.layers.26.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.26.adapter_type_router.w1.bias", [28]], ["llama.layers.26.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.26.adapter_type_router.w2.bias", [7]], ["llama.layers.26.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.26.adapter_type_router.w3.bias", [28]], ["llama.layers.27.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.27.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.27.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.27.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.27.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.27.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.27.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.27.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.27.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.27.attention.prompt.weight", [10, 4096]], ["llama.layers.27.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.27.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.27.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.27.p_adapter.down_proj.bias", [16]], ["llama.layers.27.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.27.p_adapter.up_proj.bias", [4096]], ["llama.layers.27.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.27.adapter_type_router.w1.bias", [28]], ["llama.layers.27.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.27.adapter_type_router.w2.bias", [7]], ["llama.layers.27.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.27.adapter_type_router.w3.bias", [28]], ["llama.layers.28.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.28.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.28.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.28.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.28.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.28.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.28.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.28.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.28.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.28.attention.prompt.weight", [10, 4096]], ["llama.layers.28.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.28.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.28.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.28.p_adapter.down_proj.bias", [16]], ["llama.layers.28.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.28.p_adapter.up_proj.bias", [4096]], ["llama.layers.28.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.28.adapter_type_router.w1.bias", [28]], ["llama.layers.28.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.28.adapter_type_router.w2.bias", [7]], ["llama.layers.28.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.28.adapter_type_router.w3.bias", [28]], ["llama.layers.29.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.29.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.29.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.29.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.29.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.29.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.29.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.29.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.29.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.29.attention.prompt.weight", [10, 4096]], ["llama.layers.29.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.29.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.29.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.29.p_adapter.down_proj.bias", [16]], ["llama.layers.29.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.29.p_adapter.up_proj.bias", [4096]], ["llama.layers.29.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.29.adapter_type_router.w1.bias", [28]], ["llama.layers.29.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.29.adapter_type_router.w2.bias", [7]], ["llama.layers.29.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.29.adapter_type_router.w3.bias", [28]], ["llama.layers.30.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.30.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.30.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.30.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.30.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.30.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.30.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.30.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.30.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.30.attention.prompt.weight", [10, 4096]], ["llama.layers.30.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.30.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.30.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.30.p_adapter.down_proj.bias", [16]], ["llama.layers.30.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.30.p_adapter.up_proj.bias", [4096]], ["llama.layers.30.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.30.adapter_type_router.w1.bias", [28]], ["llama.layers.30.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.30.adapter_type_router.w2.bias", [7]], ["llama.layers.30.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.30.adapter_type_router.w3.bias", [28]], ["llama.layers.31.attention.prompt_gate", [1, 32, 1, 1]], ["llama.layers.31.attention.lora_Q.lora_A.weight", [8, 4096]], ["llama.layers.31.attention.lora_Q.lora_B.weight", [4096, 8]], ["llama.layers.31.attention.lora_K.lora_A.weight", [8, 4096]], ["llama.layers.31.attention.lora_K.lora_B.weight", [1024, 8]], ["llama.layers.31.attention.lora_V.lora_A.weight", [8, 4096]], ["llama.layers.31.attention.lora_V.lora_B.weight", [1024, 8]], ["llama.layers.31.attention.lora_O.lora_A.weight", [8, 4096]], ["llama.layers.31.attention.lora_O.lora_B.weight", [4096, 8]], ["llama.layers.31.attention.prompt.weight", [10, 4096]], ["llama.layers.31.feed_forward.lora_DOWN.lora_A.weight", [8, 14336]], ["llama.layers.31.feed_forward.lora_DOWN.lora_B.weight", [4096, 8]], ["llama.layers.31.p_adapter.down_proj.weight", [16, 4096]], ["llama.layers.31.p_adapter.down_proj.bias", [16]], ["llama.layers.31.p_adapter.up_proj.weight", [4096, 16]], ["llama.layers.31.p_adapter.up_proj.bias", [4096]], ["llama.layers.31.adapter_type_router.w1.weight", [28, 4096]], ["llama.layers.31.adapter_type_router.w1.bias", [28]], ["llama.layers.31.adapter_type_router.w2.weight", [7, 28]], ["llama.layers.31.adapter_type_router.w2.bias", [7]], ["llama.layers.31.adapter_type_router.w3.weight", [28, 4096]], ["llama.layers.31.adapter_type_router.w3.bias", [28]]]}