| { | |
| "_module_configs": { | |
| ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))": { | |
| "add_zero_init_conv_layer": false, | |
| "batch_first": true, | |
| "discriminator_cfg": { | |
| "batch_first": true, | |
| "feature_dim": 720, | |
| "feature_fusion": false, | |
| "fused_feature_dim": null, | |
| "hidden_dim": 128, | |
| "max_batches_tracked": 2000, | |
| "momentum": 0.1, | |
| "num_tokens": 16, | |
| "type": "autoencoder_small", | |
| "use_momentum": true | |
| }, | |
| "feature_dim": 720, | |
| "func_adapter_cfg": { | |
| "hidden_dim": null, | |
| "lora_alpha": 32, | |
| "lora_rank": 32, | |
| "use_lora": true | |
| }, | |
| "out_feature_dim": 320, | |
| "pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))", | |
| "use_trainable_copy": false | |
| }, | |
| ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))": { | |
| "add_zero_init_conv_layer": false, | |
| "batch_first": true, | |
| "discriminator_cfg": { | |
| "batch_first": true, | |
| "feature_dim": 320, | |
| "feature_fusion": false, | |
| "fused_feature_dim": null, | |
| "hidden_dim": 64, | |
| "max_batches_tracked": 2000, | |
| "momentum": 0.1, | |
| "num_tokens": 16, | |
| "type": "autoencoder_small", | |
| "use_momentum": true | |
| }, | |
| "feature_dim": 320, | |
| "func_adapter_cfg": { | |
| "hidden_dim": null, | |
| "lora_alpha": 32, | |
| "lora_rank": 32, | |
| "use_lora": true | |
| }, | |
| "out_feature_dim": 320, | |
| "pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))", | |
| "use_trainable_copy": false | |
| }, | |
| ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)": { | |
| "add_zero_init_conv_layer": false, | |
| "batch_first": true, | |
| "discriminator_cfg": { | |
| "batch_first": true, | |
| "feature_dim": 720, | |
| "feature_fusion": false, | |
| "fused_feature_dim": null, | |
| "hidden_dim": 128, | |
| "max_batches_tracked": 2000, | |
| "momentum": 0.1, | |
| "num_tokens": 16, | |
| "type": "autoencoder_small", | |
| "use_momentum": true | |
| }, | |
| "feature_dim": 720, | |
| "func_adapter_cfg": { | |
| "hidden_dim": null, | |
| "lora_alpha": 32, | |
| "lora_rank": 32, | |
| "use_lora": true | |
| }, | |
| "out_feature_dim": 960, | |
| "pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)", | |
| "use_trainable_copy": false | |
| } | |
| }, | |
| "add_zero_init_conv_layer": false, | |
| "auto_mapping": { | |
| "base_model_class": "PeftWrapperPolicy", | |
| "parent_library": "__main__" | |
| }, | |
| "base_model_name_or_path": null, | |
| "batch_first": true, | |
| "discriminator_cfg": null, | |
| "feature_dim": null, | |
| "func_adapter_cfg": null, | |
| "inference_mode": true, | |
| "num_learned_task": 1, | |
| "out_feature_dim": null, | |
| "peft_type": "CLARE", | |
| "revision": null, | |
| "structure": { | |
| "lm_expert.layers.0.self_attn.k_proj.0": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.0.self_attn.q_proj.0": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.0.self_attn.v_proj.0": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.1.self_attn.k_proj.1": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.1.self_attn.q_proj.1": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.1.self_attn.v_proj.1": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.10.self_attn.k_proj.10": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.10.self_attn.q_proj.10": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.10.self_attn.v_proj.10": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.11.self_attn.k_proj.11": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.11.self_attn.q_proj.11": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.11.self_attn.v_proj.11": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.12.self_attn.k_proj.12": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.12.self_attn.q_proj.12": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.12.self_attn.v_proj.12": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.13.self_attn.k_proj.13": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.13.self_attn.q_proj.13": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.13.self_attn.v_proj.13": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.14.self_attn.k_proj.14": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.14.self_attn.q_proj.14": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.14.self_attn.v_proj.14": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.15.self_attn.k_proj.15": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.15.self_attn.q_proj.15": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.15.self_attn.v_proj.15": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.2.self_attn.k_proj.2": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.2.self_attn.q_proj.2": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.2.self_attn.v_proj.2": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.3.self_attn.k_proj.3": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.3.self_attn.q_proj.3": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.3.self_attn.v_proj.3": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.4.self_attn.k_proj.4": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.4.self_attn.q_proj.4": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.4.self_attn.v_proj.4": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.5.self_attn.k_proj.5": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.5.self_attn.q_proj.5": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.5.self_attn.v_proj.5": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.6.self_attn.k_proj.6": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.6.self_attn.q_proj.6": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.6.self_attn.v_proj.6": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.7.self_attn.k_proj.7": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.7.self_attn.q_proj.7": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.7.self_attn.v_proj.7": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.8.self_attn.k_proj.8": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.8.self_attn.q_proj.8": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.8.self_attn.v_proj.8": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.9.self_attn.k_proj.9": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.9.self_attn.q_proj.9": [ | |
| 1, | |
| 1 | |
| ], | |
| "lm_expert.layers.9.self_attn.v_proj.9": [ | |
| 1, | |
| 1 | |
| ] | |
| }, | |
| "target_modules": { | |
| ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))": { | |
| "add_zero_init_conv_layer": false, | |
| "batch_first": true, | |
| "discriminator_cfg": { | |
| "batch_first": true, | |
| "feature_dim": 720, | |
| "feature_fusion": false, | |
| "fused_feature_dim": null, | |
| "hidden_dim": 128, | |
| "max_batches_tracked": 2000, | |
| "momentum": 0.1, | |
| "num_tokens": 16, | |
| "type": "autoencoder_small", | |
| "use_momentum": true | |
| }, | |
| "feature_dim": 720, | |
| "func_adapter_cfg": { | |
| "hidden_dim": null, | |
| "lora_alpha": 32, | |
| "lora_rank": 32, | |
| "use_lora": true | |
| }, | |
| "out_feature_dim": 320, | |
| "use_trainable_copy": false | |
| }, | |
| ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))": { | |
| "add_zero_init_conv_layer": false, | |
| "batch_first": true, | |
| "discriminator_cfg": { | |
| "batch_first": true, | |
| "feature_dim": 320, | |
| "feature_fusion": false, | |
| "fused_feature_dim": null, | |
| "hidden_dim": 64, | |
| "max_batches_tracked": 2000, | |
| "momentum": 0.1, | |
| "num_tokens": 16, | |
| "type": "autoencoder_small", | |
| "use_momentum": true | |
| }, | |
| "feature_dim": 320, | |
| "func_adapter_cfg": { | |
| "hidden_dim": null, | |
| "lora_alpha": 32, | |
| "lora_rank": 32, | |
| "use_lora": true | |
| }, | |
| "out_feature_dim": 320, | |
| "use_trainable_copy": false | |
| }, | |
| ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)": { | |
| "add_zero_init_conv_layer": false, | |
| "batch_first": true, | |
| "discriminator_cfg": { | |
| "batch_first": true, | |
| "feature_dim": 720, | |
| "feature_fusion": false, | |
| "fused_feature_dim": null, | |
| "hidden_dim": 128, | |
| "max_batches_tracked": 2000, | |
| "momentum": 0.1, | |
| "num_tokens": 16, | |
| "type": "autoencoder_small", | |
| "use_momentum": true | |
| }, | |
| "feature_dim": 720, | |
| "func_adapter_cfg": { | |
| "hidden_dim": null, | |
| "lora_alpha": 32, | |
| "lora_rank": 32, | |
| "use_lora": true | |
| }, | |
| "out_feature_dim": 960, | |
| "use_trainable_copy": false | |
| } | |
| }, | |
| "task_type": null, | |
| "use_trainable_copy": false | |
| } |