smolvla_clare_debug / adapter_config.json
continuallearning's picture
Upload folder using huggingface_hub
4b4ad2c verified
{
"_module_configs": {
".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))": {
"add_zero_init_conv_layer": false,
"batch_first": true,
"discriminator_cfg": {
"batch_first": true,
"feature_dim": 720,
"feature_fusion": false,
"fused_feature_dim": null,
"hidden_dim": 128,
"max_batches_tracked": 2000,
"momentum": 0.1,
"num_tokens": 16,
"type": "autoencoder_small",
"use_momentum": true
},
"feature_dim": 720,
"func_adapter_cfg": {
"hidden_dim": null,
"lora_alpha": 32,
"lora_rank": 32,
"use_lora": true
},
"out_feature_dim": 320,
"pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))",
"use_trainable_copy": false
},
".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))": {
"add_zero_init_conv_layer": false,
"batch_first": true,
"discriminator_cfg": {
"batch_first": true,
"feature_dim": 320,
"feature_fusion": false,
"fused_feature_dim": null,
"hidden_dim": 64,
"max_batches_tracked": 2000,
"momentum": 0.1,
"num_tokens": 16,
"type": "autoencoder_small",
"use_momentum": true
},
"feature_dim": 320,
"func_adapter_cfg": {
"hidden_dim": null,
"lora_alpha": 32,
"lora_rank": 32,
"use_lora": true
},
"out_feature_dim": 320,
"pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))",
"use_trainable_copy": false
},
".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)": {
"add_zero_init_conv_layer": false,
"batch_first": true,
"discriminator_cfg": {
"batch_first": true,
"feature_dim": 720,
"feature_fusion": false,
"fused_feature_dim": null,
"hidden_dim": 128,
"max_batches_tracked": 2000,
"momentum": 0.1,
"num_tokens": 16,
"type": "autoencoder_small",
"use_momentum": true
},
"feature_dim": 720,
"func_adapter_cfg": {
"hidden_dim": null,
"lora_alpha": 32,
"lora_rank": 32,
"use_lora": true
},
"out_feature_dim": 960,
"pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)",
"use_trainable_copy": false
}
},
"add_zero_init_conv_layer": false,
"auto_mapping": {
"base_model_class": "PeftWrapperPolicy",
"parent_library": "__main__"
},
"base_model_name_or_path": null,
"batch_first": true,
"discriminator_cfg": null,
"feature_dim": null,
"func_adapter_cfg": null,
"inference_mode": true,
"num_learned_task": 1,
"out_feature_dim": null,
"peft_type": "CLARE",
"revision": null,
"structure": {
"lm_expert.layers.0.self_attn.k_proj.0": [
1,
1
],
"lm_expert.layers.0.self_attn.q_proj.0": [
1,
1
],
"lm_expert.layers.0.self_attn.v_proj.0": [
1,
1
],
"lm_expert.layers.1.self_attn.k_proj.1": [
1,
1
],
"lm_expert.layers.1.self_attn.q_proj.1": [
1,
1
],
"lm_expert.layers.1.self_attn.v_proj.1": [
1,
1
],
"lm_expert.layers.10.self_attn.k_proj.10": [
1,
1
],
"lm_expert.layers.10.self_attn.q_proj.10": [
1,
1
],
"lm_expert.layers.10.self_attn.v_proj.10": [
1,
1
],
"lm_expert.layers.11.self_attn.k_proj.11": [
1,
1
],
"lm_expert.layers.11.self_attn.q_proj.11": [
1,
1
],
"lm_expert.layers.11.self_attn.v_proj.11": [
1,
1
],
"lm_expert.layers.12.self_attn.k_proj.12": [
1,
1
],
"lm_expert.layers.12.self_attn.q_proj.12": [
1,
1
],
"lm_expert.layers.12.self_attn.v_proj.12": [
1,
1
],
"lm_expert.layers.13.self_attn.k_proj.13": [
1,
1
],
"lm_expert.layers.13.self_attn.q_proj.13": [
1,
1
],
"lm_expert.layers.13.self_attn.v_proj.13": [
1,
1
],
"lm_expert.layers.14.self_attn.k_proj.14": [
1,
1
],
"lm_expert.layers.14.self_attn.q_proj.14": [
1,
1
],
"lm_expert.layers.14.self_attn.v_proj.14": [
1,
1
],
"lm_expert.layers.15.self_attn.k_proj.15": [
1,
1
],
"lm_expert.layers.15.self_attn.q_proj.15": [
1,
1
],
"lm_expert.layers.15.self_attn.v_proj.15": [
1,
1
],
"lm_expert.layers.2.self_attn.k_proj.2": [
1,
1
],
"lm_expert.layers.2.self_attn.q_proj.2": [
1,
1
],
"lm_expert.layers.2.self_attn.v_proj.2": [
1,
1
],
"lm_expert.layers.3.self_attn.k_proj.3": [
1,
1
],
"lm_expert.layers.3.self_attn.q_proj.3": [
1,
1
],
"lm_expert.layers.3.self_attn.v_proj.3": [
1,
1
],
"lm_expert.layers.4.self_attn.k_proj.4": [
1,
1
],
"lm_expert.layers.4.self_attn.q_proj.4": [
1,
1
],
"lm_expert.layers.4.self_attn.v_proj.4": [
1,
1
],
"lm_expert.layers.5.self_attn.k_proj.5": [
1,
1
],
"lm_expert.layers.5.self_attn.q_proj.5": [
1,
1
],
"lm_expert.layers.5.self_attn.v_proj.5": [
1,
1
],
"lm_expert.layers.6.self_attn.k_proj.6": [
1,
1
],
"lm_expert.layers.6.self_attn.q_proj.6": [
1,
1
],
"lm_expert.layers.6.self_attn.v_proj.6": [
1,
1
],
"lm_expert.layers.7.self_attn.k_proj.7": [
1,
1
],
"lm_expert.layers.7.self_attn.q_proj.7": [
1,
1
],
"lm_expert.layers.7.self_attn.v_proj.7": [
1,
1
],
"lm_expert.layers.8.self_attn.k_proj.8": [
1,
1
],
"lm_expert.layers.8.self_attn.q_proj.8": [
1,
1
],
"lm_expert.layers.8.self_attn.v_proj.8": [
1,
1
],
"lm_expert.layers.9.self_attn.k_proj.9": [
1,
1
],
"lm_expert.layers.9.self_attn.q_proj.9": [
1,
1
],
"lm_expert.layers.9.self_attn.v_proj.9": [
1,
1
]
},
"target_modules": {
".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))": {
"add_zero_init_conv_layer": false,
"batch_first": true,
"discriminator_cfg": {
"batch_first": true,
"feature_dim": 720,
"feature_fusion": false,
"fused_feature_dim": null,
"hidden_dim": 128,
"max_batches_tracked": 2000,
"momentum": 0.1,
"num_tokens": 16,
"type": "autoencoder_small",
"use_momentum": true
},
"feature_dim": 720,
"func_adapter_cfg": {
"hidden_dim": null,
"lora_alpha": 32,
"lora_rank": 32,
"use_lora": true
},
"out_feature_dim": 320,
"use_trainable_copy": false
},
".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))": {
"add_zero_init_conv_layer": false,
"batch_first": true,
"discriminator_cfg": {
"batch_first": true,
"feature_dim": 320,
"feature_fusion": false,
"fused_feature_dim": null,
"hidden_dim": 64,
"max_batches_tracked": 2000,
"momentum": 0.1,
"num_tokens": 16,
"type": "autoencoder_small",
"use_momentum": true
},
"feature_dim": 320,
"func_adapter_cfg": {
"hidden_dim": null,
"lora_alpha": 32,
"lora_rank": 32,
"use_lora": true
},
"out_feature_dim": 320,
"use_trainable_copy": false
},
".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)": {
"add_zero_init_conv_layer": false,
"batch_first": true,
"discriminator_cfg": {
"batch_first": true,
"feature_dim": 720,
"feature_fusion": false,
"fused_feature_dim": null,
"hidden_dim": 128,
"max_batches_tracked": 2000,
"momentum": 0.1,
"num_tokens": 16,
"type": "autoencoder_small",
"use_momentum": true
},
"feature_dim": 720,
"func_adapter_cfg": {
"hidden_dim": null,
"lora_alpha": 32,
"lora_rank": 32,
"use_lora": true
},
"out_feature_dim": 960,
"use_trainable_copy": false
}
},
"task_type": null,
"use_trainable_copy": false
}