continuallearning commited on
Commit
4b4ad2c
·
verified ·
1 Parent(s): 31ee0f1

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +201 -0
  2. adapter_config.json +370 -0
  3. adapter_model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+
5
+ # Model Card for Model ID
6
+
7
+ <!-- Provide a quick summary of what the model is/does. -->
8
+
9
+
10
+
11
+ ## Model Details
12
+
13
+ ### Model Description
14
+
15
+ <!-- Provide a longer summary of what this model is. -->
16
+
17
+
18
+
19
+ - **Developed by:** [More Information Needed]
20
+ - **Funded by [optional]:** [More Information Needed]
21
+ - **Shared by [optional]:** [More Information Needed]
22
+ - **Model type:** [More Information Needed]
23
+ - **Language(s) (NLP):** [More Information Needed]
24
+ - **License:** [More Information Needed]
25
+ - **Finetuned from model [optional]:** [More Information Needed]
26
+
27
+ ### Model Sources [optional]
28
+
29
+ <!-- Provide the basic links for the model. -->
30
+
31
+ - **Repository:** [More Information Needed]
32
+ - **Paper [optional]:** [More Information Needed]
33
+ - **Demo [optional]:** [More Information Needed]
34
+
35
+ ## Uses
36
+
37
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
38
+
39
+ ### Direct Use
40
+
41
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
42
+
43
+ [More Information Needed]
44
+
45
+ ### Downstream Use [optional]
46
+
47
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Out-of-Scope Use
52
+
53
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
54
+
55
+ [More Information Needed]
56
+
57
+ ## Bias, Risks, and Limitations
58
+
59
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ### Recommendations
64
+
65
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
66
+
67
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
68
+
69
+ ## How to Get Started with the Model
70
+
71
+ Use the code below to get started with the model.
72
+
73
+ [More Information Needed]
74
+
75
+ ## Training Details
76
+
77
+ ### Training Data
78
+
79
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
80
+
81
+ [More Information Needed]
82
+
83
+ ### Training Procedure
84
+
85
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
86
+
87
+ #### Preprocessing [optional]
88
+
89
+ [More Information Needed]
90
+
91
+
92
+ #### Training Hyperparameters
93
+
94
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
95
+
96
+ #### Speeds, Sizes, Times [optional]
97
+
98
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
99
+
100
+ [More Information Needed]
101
+
102
+ ## Evaluation
103
+
104
+ <!-- This section describes the evaluation protocols and provides the results. -->
105
+
106
+ ### Testing Data, Factors & Metrics
107
+
108
+ #### Testing Data
109
+
110
+ <!-- This should link to a Dataset Card if possible. -->
111
+
112
+ [More Information Needed]
113
+
114
+ #### Factors
115
+
116
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Metrics
121
+
122
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
123
+
124
+ [More Information Needed]
125
+
126
+ ### Results
127
+
128
+ [More Information Needed]
129
+
130
+ #### Summary
131
+
132
+
133
+
134
+ ## Model Examination [optional]
135
+
136
+ <!-- Relevant interpretability work for the model goes here -->
137
+
138
+ [More Information Needed]
139
+
140
+ ## Environmental Impact
141
+
142
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
143
+
144
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
145
+
146
+ - **Hardware Type:** [More Information Needed]
147
+ - **Hours used:** [More Information Needed]
148
+ - **Cloud Provider:** [More Information Needed]
149
+ - **Compute Region:** [More Information Needed]
150
+ - **Carbon Emitted:** [More Information Needed]
151
+
152
+ ## Technical Specifications [optional]
153
+
154
+ ### Model Architecture and Objective
155
+
156
+ [More Information Needed]
157
+
158
+ ### Compute Infrastructure
159
+
160
+ [More Information Needed]
161
+
162
+ #### Hardware
163
+
164
+ [More Information Needed]
165
+
166
+ #### Software
167
+
168
+ [More Information Needed]
169
+
170
+ ## Citation [optional]
171
+
172
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
173
+
174
+ **BibTeX:**
175
+
176
+ [More Information Needed]
177
+
178
+ **APA:**
179
+
180
+ [More Information Needed]
181
+
182
+ ## Glossary [optional]
183
+
184
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
185
+
186
+ [More Information Needed]
187
+
188
+ ## More Information [optional]
189
+
190
+ [More Information Needed]
191
+
192
+ ## Model Card Authors [optional]
193
+
194
+ [More Information Needed]
195
+
196
+ ## Model Card Contact
197
+
198
+ [More Information Needed]
199
+ ### Framework versions
200
+
201
+ - PEFT 0.17.1.dev0
adapter_config.json ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_module_configs": {
3
+ ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))": {
4
+ "add_zero_init_conv_layer": false,
5
+ "batch_first": true,
6
+ "discriminator_cfg": {
7
+ "batch_first": true,
8
+ "feature_dim": 720,
9
+ "feature_fusion": false,
10
+ "fused_feature_dim": null,
11
+ "hidden_dim": 128,
12
+ "max_batches_tracked": 2000,
13
+ "momentum": 0.1,
14
+ "num_tokens": 16,
15
+ "type": "autoencoder_small",
16
+ "use_momentum": true
17
+ },
18
+ "feature_dim": 720,
19
+ "func_adapter_cfg": {
20
+ "hidden_dim": null,
21
+ "lora_alpha": 32,
22
+ "lora_rank": 32,
23
+ "use_lora": true
24
+ },
25
+ "out_feature_dim": 320,
26
+ "pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))",
27
+ "use_trainable_copy": false
28
+ },
29
+ ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))": {
30
+ "add_zero_init_conv_layer": false,
31
+ "batch_first": true,
32
+ "discriminator_cfg": {
33
+ "batch_first": true,
34
+ "feature_dim": 320,
35
+ "feature_fusion": false,
36
+ "fused_feature_dim": null,
37
+ "hidden_dim": 64,
38
+ "max_batches_tracked": 2000,
39
+ "momentum": 0.1,
40
+ "num_tokens": 16,
41
+ "type": "autoencoder_small",
42
+ "use_momentum": true
43
+ },
44
+ "feature_dim": 320,
45
+ "func_adapter_cfg": {
46
+ "hidden_dim": null,
47
+ "lora_alpha": 32,
48
+ "lora_rank": 32,
49
+ "use_lora": true
50
+ },
51
+ "out_feature_dim": 320,
52
+ "pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))",
53
+ "use_trainable_copy": false
54
+ },
55
+ ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)": {
56
+ "add_zero_init_conv_layer": false,
57
+ "batch_first": true,
58
+ "discriminator_cfg": {
59
+ "batch_first": true,
60
+ "feature_dim": 720,
61
+ "feature_fusion": false,
62
+ "fused_feature_dim": null,
63
+ "hidden_dim": 128,
64
+ "max_batches_tracked": 2000,
65
+ "momentum": 0.1,
66
+ "num_tokens": 16,
67
+ "type": "autoencoder_small",
68
+ "use_momentum": true
69
+ },
70
+ "feature_dim": 720,
71
+ "func_adapter_cfg": {
72
+ "hidden_dim": null,
73
+ "lora_alpha": 32,
74
+ "lora_rank": 32,
75
+ "use_lora": true
76
+ },
77
+ "out_feature_dim": 960,
78
+ "pattern": ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)",
79
+ "use_trainable_copy": false
80
+ }
81
+ },
82
+ "add_zero_init_conv_layer": false,
83
+ "auto_mapping": {
84
+ "base_model_class": "PeftWrapperPolicy",
85
+ "parent_library": "__main__"
86
+ },
87
+ "base_model_name_or_path": null,
88
+ "batch_first": true,
89
+ "discriminator_cfg": null,
90
+ "feature_dim": null,
91
+ "func_adapter_cfg": null,
92
+ "inference_mode": true,
93
+ "num_learned_task": 1,
94
+ "out_feature_dim": null,
95
+ "peft_type": "CLARE",
96
+ "revision": null,
97
+ "structure": {
98
+ "lm_expert.layers.0.self_attn.k_proj.0": [
99
+ 1,
100
+ 1
101
+ ],
102
+ "lm_expert.layers.0.self_attn.q_proj.0": [
103
+ 1,
104
+ 1
105
+ ],
106
+ "lm_expert.layers.0.self_attn.v_proj.0": [
107
+ 1,
108
+ 1
109
+ ],
110
+ "lm_expert.layers.1.self_attn.k_proj.1": [
111
+ 1,
112
+ 1
113
+ ],
114
+ "lm_expert.layers.1.self_attn.q_proj.1": [
115
+ 1,
116
+ 1
117
+ ],
118
+ "lm_expert.layers.1.self_attn.v_proj.1": [
119
+ 1,
120
+ 1
121
+ ],
122
+ "lm_expert.layers.10.self_attn.k_proj.10": [
123
+ 1,
124
+ 1
125
+ ],
126
+ "lm_expert.layers.10.self_attn.q_proj.10": [
127
+ 1,
128
+ 1
129
+ ],
130
+ "lm_expert.layers.10.self_attn.v_proj.10": [
131
+ 1,
132
+ 1
133
+ ],
134
+ "lm_expert.layers.11.self_attn.k_proj.11": [
135
+ 1,
136
+ 1
137
+ ],
138
+ "lm_expert.layers.11.self_attn.q_proj.11": [
139
+ 1,
140
+ 1
141
+ ],
142
+ "lm_expert.layers.11.self_attn.v_proj.11": [
143
+ 1,
144
+ 1
145
+ ],
146
+ "lm_expert.layers.12.self_attn.k_proj.12": [
147
+ 1,
148
+ 1
149
+ ],
150
+ "lm_expert.layers.12.self_attn.q_proj.12": [
151
+ 1,
152
+ 1
153
+ ],
154
+ "lm_expert.layers.12.self_attn.v_proj.12": [
155
+ 1,
156
+ 1
157
+ ],
158
+ "lm_expert.layers.13.self_attn.k_proj.13": [
159
+ 1,
160
+ 1
161
+ ],
162
+ "lm_expert.layers.13.self_attn.q_proj.13": [
163
+ 1,
164
+ 1
165
+ ],
166
+ "lm_expert.layers.13.self_attn.v_proj.13": [
167
+ 1,
168
+ 1
169
+ ],
170
+ "lm_expert.layers.14.self_attn.k_proj.14": [
171
+ 1,
172
+ 1
173
+ ],
174
+ "lm_expert.layers.14.self_attn.q_proj.14": [
175
+ 1,
176
+ 1
177
+ ],
178
+ "lm_expert.layers.14.self_attn.v_proj.14": [
179
+ 1,
180
+ 1
181
+ ],
182
+ "lm_expert.layers.15.self_attn.k_proj.15": [
183
+ 1,
184
+ 1
185
+ ],
186
+ "lm_expert.layers.15.self_attn.q_proj.15": [
187
+ 1,
188
+ 1
189
+ ],
190
+ "lm_expert.layers.15.self_attn.v_proj.15": [
191
+ 1,
192
+ 1
193
+ ],
194
+ "lm_expert.layers.2.self_attn.k_proj.2": [
195
+ 1,
196
+ 1
197
+ ],
198
+ "lm_expert.layers.2.self_attn.q_proj.2": [
199
+ 1,
200
+ 1
201
+ ],
202
+ "lm_expert.layers.2.self_attn.v_proj.2": [
203
+ 1,
204
+ 1
205
+ ],
206
+ "lm_expert.layers.3.self_attn.k_proj.3": [
207
+ 1,
208
+ 1
209
+ ],
210
+ "lm_expert.layers.3.self_attn.q_proj.3": [
211
+ 1,
212
+ 1
213
+ ],
214
+ "lm_expert.layers.3.self_attn.v_proj.3": [
215
+ 1,
216
+ 1
217
+ ],
218
+ "lm_expert.layers.4.self_attn.k_proj.4": [
219
+ 1,
220
+ 1
221
+ ],
222
+ "lm_expert.layers.4.self_attn.q_proj.4": [
223
+ 1,
224
+ 1
225
+ ],
226
+ "lm_expert.layers.4.self_attn.v_proj.4": [
227
+ 1,
228
+ 1
229
+ ],
230
+ "lm_expert.layers.5.self_attn.k_proj.5": [
231
+ 1,
232
+ 1
233
+ ],
234
+ "lm_expert.layers.5.self_attn.q_proj.5": [
235
+ 1,
236
+ 1
237
+ ],
238
+ "lm_expert.layers.5.self_attn.v_proj.5": [
239
+ 1,
240
+ 1
241
+ ],
242
+ "lm_expert.layers.6.self_attn.k_proj.6": [
243
+ 1,
244
+ 1
245
+ ],
246
+ "lm_expert.layers.6.self_attn.q_proj.6": [
247
+ 1,
248
+ 1
249
+ ],
250
+ "lm_expert.layers.6.self_attn.v_proj.6": [
251
+ 1,
252
+ 1
253
+ ],
254
+ "lm_expert.layers.7.self_attn.k_proj.7": [
255
+ 1,
256
+ 1
257
+ ],
258
+ "lm_expert.layers.7.self_attn.q_proj.7": [
259
+ 1,
260
+ 1
261
+ ],
262
+ "lm_expert.layers.7.self_attn.v_proj.7": [
263
+ 1,
264
+ 1
265
+ ],
266
+ "lm_expert.layers.8.self_attn.k_proj.8": [
267
+ 1,
268
+ 1
269
+ ],
270
+ "lm_expert.layers.8.self_attn.q_proj.8": [
271
+ 1,
272
+ 1
273
+ ],
274
+ "lm_expert.layers.8.self_attn.v_proj.8": [
275
+ 1,
276
+ 1
277
+ ],
278
+ "lm_expert.layers.9.self_attn.k_proj.9": [
279
+ 1,
280
+ 1
281
+ ],
282
+ "lm_expert.layers.9.self_attn.q_proj.9": [
283
+ 1,
284
+ 1
285
+ ],
286
+ "lm_expert.layers.9.self_attn.v_proj.9": [
287
+ 1,
288
+ 1
289
+ ]
290
+ },
291
+ "target_modules": {
292
+ ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(0|2|4|6|8|10|12|14)).self_attn.(k_proj|v_proj))": {
293
+ "add_zero_init_conv_layer": false,
294
+ "batch_first": true,
295
+ "discriminator_cfg": {
296
+ "batch_first": true,
297
+ "feature_dim": 720,
298
+ "feature_fusion": false,
299
+ "fused_feature_dim": null,
300
+ "hidden_dim": 128,
301
+ "max_batches_tracked": 2000,
302
+ "momentum": 0.1,
303
+ "num_tokens": 16,
304
+ "type": "autoencoder_small",
305
+ "use_momentum": true
306
+ },
307
+ "feature_dim": 720,
308
+ "func_adapter_cfg": {
309
+ "hidden_dim": null,
310
+ "lora_alpha": 32,
311
+ "lora_rank": 32,
312
+ "use_lora": true
313
+ },
314
+ "out_feature_dim": 320,
315
+ "use_trainable_copy": false
316
+ },
317
+ ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>(1|3|5|7|9|11|13|15)).self_attn.(k_proj|v_proj))": {
318
+ "add_zero_init_conv_layer": false,
319
+ "batch_first": true,
320
+ "discriminator_cfg": {
321
+ "batch_first": true,
322
+ "feature_dim": 320,
323
+ "feature_fusion": false,
324
+ "fused_feature_dim": null,
325
+ "hidden_dim": 64,
326
+ "max_batches_tracked": 2000,
327
+ "momentum": 0.1,
328
+ "num_tokens": 16,
329
+ "type": "autoencoder_small",
330
+ "use_momentum": true
331
+ },
332
+ "feature_dim": 320,
333
+ "func_adapter_cfg": {
334
+ "hidden_dim": null,
335
+ "lora_alpha": 32,
336
+ "lora_rank": 32,
337
+ "use_lora": true
338
+ },
339
+ "out_feature_dim": 320,
340
+ "use_trainable_copy": false
341
+ },
342
+ ".*vlm_with_expert.(?P<layer_name>lm_expert.layers.(?P<layer_id>\\d+).self_attn.q_proj)": {
343
+ "add_zero_init_conv_layer": false,
344
+ "batch_first": true,
345
+ "discriminator_cfg": {
346
+ "batch_first": true,
347
+ "feature_dim": 720,
348
+ "feature_fusion": false,
349
+ "fused_feature_dim": null,
350
+ "hidden_dim": 128,
351
+ "max_batches_tracked": 2000,
352
+ "momentum": 0.1,
353
+ "num_tokens": 16,
354
+ "type": "autoencoder_small",
355
+ "use_momentum": true
356
+ },
357
+ "feature_dim": 720,
358
+ "func_adapter_cfg": {
359
+ "hidden_dim": null,
360
+ "lora_alpha": 32,
361
+ "lora_rank": 32,
362
+ "use_lora": true
363
+ },
364
+ "out_feature_dim": 960,
365
+ "use_trainable_copy": false
366
+ }
367
+ },
368
+ "task_type": null,
369
+ "use_trainable_copy": false
370
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c34437d162b7ae597b22ee3fd6c991cf1d72ed93891355c0d89a99411f8bed
3
+ size 18711872