34245A0131EAF5C7@tum.de commited on
Commit
f156ce5
·
1 Parent(s): aa1bfa9

MoReS-rank4-r4l5 for tinyllava_phi2

Browse files
Files changed (47) hide show
  1. added_tokens.json +40 -0
  2. config.json +64 -0
  3. connector/pytorch_model.bin +3 -0
  4. language_model/config.json +30 -0
  5. language_model/pytorch_model.bin +3 -0
  6. merges.txt +0 -0
  7. mores/language_model/config.json +622 -0
  8. mores/language_model/intervention_config.json +356 -0
  9. mores/language_model/intkey_layer.0.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  10. mores/language_model/intkey_layer.1.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  11. mores/language_model/intkey_layer.10.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  12. mores/language_model/intkey_layer.11.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  13. mores/language_model/intkey_layer.12.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  14. mores/language_model/intkey_layer.13.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  15. mores/language_model/intkey_layer.14.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  16. mores/language_model/intkey_layer.15.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  17. mores/language_model/intkey_layer.16.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  18. mores/language_model/intkey_layer.17.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  19. mores/language_model/intkey_layer.18.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  20. mores/language_model/intkey_layer.19.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  21. mores/language_model/intkey_layer.2.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  22. mores/language_model/intkey_layer.20.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  23. mores/language_model/intkey_layer.21.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  24. mores/language_model/intkey_layer.22.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  25. mores/language_model/intkey_layer.23.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  26. mores/language_model/intkey_layer.24.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  27. mores/language_model/intkey_layer.25.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  28. mores/language_model/intkey_layer.26.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  29. mores/language_model/intkey_layer.27.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  30. mores/language_model/intkey_layer.28.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  31. mores/language_model/intkey_layer.29.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  32. mores/language_model/intkey_layer.3.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  33. mores/language_model/intkey_layer.30.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  34. mores/language_model/intkey_layer.31.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  35. mores/language_model/intkey_layer.4.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  36. mores/language_model/intkey_layer.5.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  37. mores/language_model/intkey_layer.6.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  38. mores/language_model/intkey_layer.7.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  39. mores/language_model/intkey_layer.8.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  40. mores/language_model/intkey_layer.9.comp.block_output.unit.pos.nunit.1#0.bin +3 -0
  41. mores/language_model/mores_pos_configs.json +10 -0
  42. special_tokens_map.json +24 -0
  43. tokenizer_config.json +327 -0
  44. trainer_state.json +0 -0
  45. vision_tower/config.json +16 -0
  46. vision_tower/pytorch_model.bin +3 -0
  47. vocab.json +0 -0
added_tokens.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50294,
3
+ "\t\t\t": 50293,
4
+ "\t\t\t\t": 50292,
5
+ "\t\t\t\t\t": 50291,
6
+ "\t\t\t\t\t\t": 50290,
7
+ "\t\t\t\t\t\t\t": 50289,
8
+ "\t\t\t\t\t\t\t\t": 50288,
9
+ "\t\t\t\t\t\t\t\t\t": 50287,
10
+ " ": 50286,
11
+ " ": 50285,
12
+ " ": 50284,
13
+ " ": 50283,
14
+ " ": 50282,
15
+ " ": 50281,
16
+ " ": 50280,
17
+ " ": 50279,
18
+ " ": 50278,
19
+ " ": 50277,
20
+ " ": 50276,
21
+ " ": 50275,
22
+ " ": 50274,
23
+ " ": 50273,
24
+ " ": 50272,
25
+ " ": 50271,
26
+ " ": 50270,
27
+ " ": 50269,
28
+ " ": 50268,
29
+ " ": 50267,
30
+ " ": 50266,
31
+ " ": 50265,
32
+ " ": 50264,
33
+ " ": 50263,
34
+ " ": 50262,
35
+ " ": 50261,
36
+ " ": 50260,
37
+ " ": 50259,
38
+ " ": 50258,
39
+ " ": 50257
40
+ }
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cache_dir": null,
3
+ "connector_type": "mlp2x_gelu",
4
+ "hidden_size": 2560,
5
+ "ignore_index": -100,
6
+ "image_aspect_ratio": "square",
7
+ "image_token_index": -200,
8
+ "llm_model_name_or_path": "microsoft/phi-2",
9
+ "model_type": "tinyllava",
10
+ "num_queries": 128,
11
+ "num_resampler_layers": 3,
12
+ "pad_token": "<|endoftext|>",
13
+ "pad_token_id": 50256,
14
+ "resampler_hidden_size": 768,
15
+ "text_config": {
16
+ "_name_or_path": "microsoft/phi-2",
17
+ "architectures": [
18
+ "PhiForCausalLM"
19
+ ],
20
+ "bos_token_id": 50256,
21
+ "embd_pdrop": 0.0,
22
+ "eos_token_id": 50256,
23
+ "hidden_act": "gelu_new",
24
+ "hidden_size": 2560,
25
+ "intermediate_size": 10240,
26
+ "layer_norm_eps": 1e-05,
27
+ "model_type": "phi",
28
+ "num_hidden_layers": 32,
29
+ "partial_rotary_factor": 0.4,
30
+ "qk_layernorm": false,
31
+ "resid_pdrop": 0.1,
32
+ "torch_dtype": "float16",
33
+ "vocab_size": 51200
34
+ },
35
+ "tokenizer_model_max_length": 3072,
36
+ "tokenizer_name_or_path": "microsoft/phi-2",
37
+ "tokenizer_padding_side": "right",
38
+ "tokenizer_use_fast": false,
39
+ "transformers_version": "4.39.3",
40
+ "tune_type_connector": "full",
41
+ "tune_type_llm": "mores",
42
+ "tune_type_vision_tower": "frozen",
43
+ "tune_vision_tower_from_layer": 0,
44
+ "use_cache": true,
45
+ "vision_config": {
46
+ "hidden_act": "gelu_pytorch_tanh",
47
+ "hidden_size": 1152,
48
+ "image_size": 384,
49
+ "intermediate_size": 4304,
50
+ "layer_norm_eps": 1e-06,
51
+ "model_name_or_path": "google/siglip-so400m-patch14-384",
52
+ "model_name_or_path2": "",
53
+ "model_type": "siglip_vision_model",
54
+ "num_attention_heads": 16,
55
+ "num_hidden_layers": 27,
56
+ "patch_size": 14
57
+ },
58
+ "vision_feature_layer": -2,
59
+ "vision_feature_select_strategy": "patch",
60
+ "vision_hidden_size": 1152,
61
+ "vision_model_name_or_path": "google/siglip-so400m-patch14-384",
62
+ "vision_model_name_or_path2": "",
63
+ "vocab_size": 51200
64
+ }
connector/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5756ac422d84ce28962625b0d96d4e819a4e0bbe570d4c96aba11f7d166bac1
3
+ size 19017728
language_model/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/phi-2",
3
+ "architectures": [
4
+ "PhiForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50256,
8
+ "embd_pdrop": 0.0,
9
+ "eos_token_id": 50256,
10
+ "hidden_act": "gelu_new",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 10240,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "phi",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 32,
20
+ "partial_rotary_factor": 0.4,
21
+ "qk_layernorm": false,
22
+ "resid_pdrop": 0.1,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "float16",
27
+ "transformers_version": "4.39.3",
28
+ "use_cache": true,
29
+ "vocab_size": 51200
30
+ }
language_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d9fddc56d9983857268436105a3544842bb8dcd6cb82bf0387bc88781c7da85
3
+ size 5559512874
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mores/language_model/config.json ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intervention_constant_sources": [
3
+ true,
4
+ true,
5
+ true,
6
+ true,
7
+ true,
8
+ true,
9
+ true,
10
+ true,
11
+ true,
12
+ true,
13
+ true,
14
+ true,
15
+ true,
16
+ true,
17
+ true,
18
+ true,
19
+ true,
20
+ true,
21
+ true,
22
+ true,
23
+ true,
24
+ true,
25
+ true,
26
+ true,
27
+ true,
28
+ true,
29
+ true,
30
+ true,
31
+ true,
32
+ true,
33
+ true,
34
+ true
35
+ ],
36
+ "intervention_dimensions": [
37
+ 2560,
38
+ 2560,
39
+ 2560,
40
+ 2560,
41
+ 2560,
42
+ 2560,
43
+ 2560,
44
+ 2560,
45
+ 2560,
46
+ 2560,
47
+ 2560,
48
+ 2560,
49
+ 2560,
50
+ 2560,
51
+ 2560,
52
+ 2560,
53
+ 2560,
54
+ 2560,
55
+ 2560,
56
+ 2560,
57
+ 2560,
58
+ 2560,
59
+ 2560,
60
+ 2560,
61
+ 2560,
62
+ 2560,
63
+ 2560,
64
+ 2560,
65
+ 2560,
66
+ 2560,
67
+ 2560,
68
+ 2560
69
+ ],
70
+ "intervention_types": [
71
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
72
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
73
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
74
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
75
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
76
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
77
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
78
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
79
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
80
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
81
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
82
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
83
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
84
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
85
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
86
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
87
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
88
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
89
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
90
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
91
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
92
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
93
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
94
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
95
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
96
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
97
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
98
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
99
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
100
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
101
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>",
102
+ "<class 'tinyllava.model.intervention_models.MoReSIntervention'>"
103
+ ],
104
+ "mode": "parallel",
105
+ "representations": [
106
+ [
107
+ 0,
108
+ "block_output",
109
+ "pos",
110
+ 1,
111
+ 4,
112
+ null,
113
+ null,
114
+ null,
115
+ null,
116
+ null,
117
+ null,
118
+ null,
119
+ null
120
+ ],
121
+ [
122
+ 1,
123
+ "block_output",
124
+ "pos",
125
+ 1,
126
+ 4,
127
+ null,
128
+ null,
129
+ null,
130
+ null,
131
+ null,
132
+ null,
133
+ null,
134
+ null
135
+ ],
136
+ [
137
+ 2,
138
+ "block_output",
139
+ "pos",
140
+ 1,
141
+ 4,
142
+ null,
143
+ null,
144
+ null,
145
+ null,
146
+ null,
147
+ null,
148
+ null,
149
+ null
150
+ ],
151
+ [
152
+ 3,
153
+ "block_output",
154
+ "pos",
155
+ 1,
156
+ 4,
157
+ null,
158
+ null,
159
+ null,
160
+ null,
161
+ null,
162
+ null,
163
+ null,
164
+ null
165
+ ],
166
+ [
167
+ 4,
168
+ "block_output",
169
+ "pos",
170
+ 1,
171
+ 4,
172
+ null,
173
+ null,
174
+ null,
175
+ null,
176
+ null,
177
+ null,
178
+ null,
179
+ null
180
+ ],
181
+ [
182
+ 5,
183
+ "block_output",
184
+ "pos",
185
+ 1,
186
+ 4,
187
+ null,
188
+ null,
189
+ null,
190
+ null,
191
+ null,
192
+ null,
193
+ null,
194
+ null
195
+ ],
196
+ [
197
+ 6,
198
+ "block_output",
199
+ "pos",
200
+ 1,
201
+ 4,
202
+ null,
203
+ null,
204
+ null,
205
+ null,
206
+ null,
207
+ null,
208
+ null,
209
+ null
210
+ ],
211
+ [
212
+ 7,
213
+ "block_output",
214
+ "pos",
215
+ 1,
216
+ 4,
217
+ null,
218
+ null,
219
+ null,
220
+ null,
221
+ null,
222
+ null,
223
+ null,
224
+ null
225
+ ],
226
+ [
227
+ 8,
228
+ "block_output",
229
+ "pos",
230
+ 1,
231
+ 4,
232
+ null,
233
+ null,
234
+ null,
235
+ null,
236
+ null,
237
+ null,
238
+ null,
239
+ null
240
+ ],
241
+ [
242
+ 9,
243
+ "block_output",
244
+ "pos",
245
+ 1,
246
+ 4,
247
+ null,
248
+ null,
249
+ null,
250
+ null,
251
+ null,
252
+ null,
253
+ null,
254
+ null
255
+ ],
256
+ [
257
+ 10,
258
+ "block_output",
259
+ "pos",
260
+ 1,
261
+ 4,
262
+ null,
263
+ null,
264
+ null,
265
+ null,
266
+ null,
267
+ null,
268
+ null,
269
+ null
270
+ ],
271
+ [
272
+ 11,
273
+ "block_output",
274
+ "pos",
275
+ 1,
276
+ 4,
277
+ null,
278
+ null,
279
+ null,
280
+ null,
281
+ null,
282
+ null,
283
+ null,
284
+ null
285
+ ],
286
+ [
287
+ 12,
288
+ "block_output",
289
+ "pos",
290
+ 1,
291
+ 4,
292
+ null,
293
+ null,
294
+ null,
295
+ null,
296
+ null,
297
+ null,
298
+ null,
299
+ null
300
+ ],
301
+ [
302
+ 13,
303
+ "block_output",
304
+ "pos",
305
+ 1,
306
+ 4,
307
+ null,
308
+ null,
309
+ null,
310
+ null,
311
+ null,
312
+ null,
313
+ null,
314
+ null
315
+ ],
316
+ [
317
+ 14,
318
+ "block_output",
319
+ "pos",
320
+ 1,
321
+ 4,
322
+ null,
323
+ null,
324
+ null,
325
+ null,
326
+ null,
327
+ null,
328
+ null,
329
+ null
330
+ ],
331
+ [
332
+ 15,
333
+ "block_output",
334
+ "pos",
335
+ 1,
336
+ 4,
337
+ null,
338
+ null,
339
+ null,
340
+ null,
341
+ null,
342
+ null,
343
+ null,
344
+ null
345
+ ],
346
+ [
347
+ 16,
348
+ "block_output",
349
+ "pos",
350
+ 1,
351
+ 4,
352
+ null,
353
+ null,
354
+ null,
355
+ null,
356
+ null,
357
+ null,
358
+ null,
359
+ null
360
+ ],
361
+ [
362
+ 17,
363
+ "block_output",
364
+ "pos",
365
+ 1,
366
+ 4,
367
+ null,
368
+ null,
369
+ null,
370
+ null,
371
+ null,
372
+ null,
373
+ null,
374
+ null
375
+ ],
376
+ [
377
+ 18,
378
+ "block_output",
379
+ "pos",
380
+ 1,
381
+ 4,
382
+ null,
383
+ null,
384
+ null,
385
+ null,
386
+ null,
387
+ null,
388
+ null,
389
+ null
390
+ ],
391
+ [
392
+ 19,
393
+ "block_output",
394
+ "pos",
395
+ 1,
396
+ 4,
397
+ null,
398
+ null,
399
+ null,
400
+ null,
401
+ null,
402
+ null,
403
+ null,
404
+ null
405
+ ],
406
+ [
407
+ 20,
408
+ "block_output",
409
+ "pos",
410
+ 1,
411
+ 4,
412
+ null,
413
+ null,
414
+ null,
415
+ null,
416
+ null,
417
+ null,
418
+ null,
419
+ null
420
+ ],
421
+ [
422
+ 21,
423
+ "block_output",
424
+ "pos",
425
+ 1,
426
+ 4,
427
+ null,
428
+ null,
429
+ null,
430
+ null,
431
+ null,
432
+ null,
433
+ null,
434
+ null
435
+ ],
436
+ [
437
+ 22,
438
+ "block_output",
439
+ "pos",
440
+ 1,
441
+ 4,
442
+ null,
443
+ null,
444
+ null,
445
+ null,
446
+ null,
447
+ null,
448
+ null,
449
+ null
450
+ ],
451
+ [
452
+ 23,
453
+ "block_output",
454
+ "pos",
455
+ 1,
456
+ 4,
457
+ null,
458
+ null,
459
+ null,
460
+ null,
461
+ null,
462
+ null,
463
+ null,
464
+ null
465
+ ],
466
+ [
467
+ 24,
468
+ "block_output",
469
+ "pos",
470
+ 1,
471
+ 4,
472
+ null,
473
+ null,
474
+ null,
475
+ null,
476
+ null,
477
+ null,
478
+ null,
479
+ null
480
+ ],
481
+ [
482
+ 25,
483
+ "block_output",
484
+ "pos",
485
+ 1,
486
+ 4,
487
+ null,
488
+ null,
489
+ null,
490
+ null,
491
+ null,
492
+ null,
493
+ null,
494
+ null
495
+ ],
496
+ [
497
+ 26,
498
+ "block_output",
499
+ "pos",
500
+ 1,
501
+ 4,
502
+ null,
503
+ null,
504
+ null,
505
+ null,
506
+ null,
507
+ null,
508
+ null,
509
+ null
510
+ ],
511
+ [
512
+ 27,
513
+ "block_output",
514
+ "pos",
515
+ 1,
516
+ 4,
517
+ null,
518
+ null,
519
+ null,
520
+ null,
521
+ null,
522
+ null,
523
+ null,
524
+ null
525
+ ],
526
+ [
527
+ 28,
528
+ "block_output",
529
+ "pos",
530
+ 1,
531
+ 4,
532
+ null,
533
+ null,
534
+ null,
535
+ null,
536
+ null,
537
+ null,
538
+ null,
539
+ null
540
+ ],
541
+ [
542
+ 29,
543
+ "block_output",
544
+ "pos",
545
+ 1,
546
+ 4,
547
+ null,
548
+ null,
549
+ null,
550
+ null,
551
+ null,
552
+ null,
553
+ null,
554
+ null
555
+ ],
556
+ [
557
+ 30,
558
+ "block_output",
559
+ "pos",
560
+ 1,
561
+ 4,
562
+ null,
563
+ null,
564
+ null,
565
+ null,
566
+ null,
567
+ null,
568
+ null,
569
+ null
570
+ ],
571
+ [
572
+ 31,
573
+ "block_output",
574
+ "pos",
575
+ 1,
576
+ 4,
577
+ null,
578
+ null,
579
+ null,
580
+ null,
581
+ null,
582
+ null,
583
+ null,
584
+ null
585
+ ]
586
+ ],
587
+ "sorted_keys": [
588
+ "layer.0.comp.block_output.unit.pos.nunit.1#0",
589
+ "layer.1.comp.block_output.unit.pos.nunit.1#0",
590
+ "layer.2.comp.block_output.unit.pos.nunit.1#0",
591
+ "layer.3.comp.block_output.unit.pos.nunit.1#0",
592
+ "layer.4.comp.block_output.unit.pos.nunit.1#0",
593
+ "layer.5.comp.block_output.unit.pos.nunit.1#0",
594
+ "layer.6.comp.block_output.unit.pos.nunit.1#0",
595
+ "layer.7.comp.block_output.unit.pos.nunit.1#0",
596
+ "layer.8.comp.block_output.unit.pos.nunit.1#0",
597
+ "layer.9.comp.block_output.unit.pos.nunit.1#0",
598
+ "layer.10.comp.block_output.unit.pos.nunit.1#0",
599
+ "layer.11.comp.block_output.unit.pos.nunit.1#0",
600
+ "layer.12.comp.block_output.unit.pos.nunit.1#0",
601
+ "layer.13.comp.block_output.unit.pos.nunit.1#0",
602
+ "layer.14.comp.block_output.unit.pos.nunit.1#0",
603
+ "layer.15.comp.block_output.unit.pos.nunit.1#0",
604
+ "layer.16.comp.block_output.unit.pos.nunit.1#0",
605
+ "layer.17.comp.block_output.unit.pos.nunit.1#0",
606
+ "layer.18.comp.block_output.unit.pos.nunit.1#0",
607
+ "layer.19.comp.block_output.unit.pos.nunit.1#0",
608
+ "layer.20.comp.block_output.unit.pos.nunit.1#0",
609
+ "layer.21.comp.block_output.unit.pos.nunit.1#0",
610
+ "layer.22.comp.block_output.unit.pos.nunit.1#0",
611
+ "layer.23.comp.block_output.unit.pos.nunit.1#0",
612
+ "layer.24.comp.block_output.unit.pos.nunit.1#0",
613
+ "layer.25.comp.block_output.unit.pos.nunit.1#0",
614
+ "layer.26.comp.block_output.unit.pos.nunit.1#0",
615
+ "layer.27.comp.block_output.unit.pos.nunit.1#0",
616
+ "layer.28.comp.block_output.unit.pos.nunit.1#0",
617
+ "layer.29.comp.block_output.unit.pos.nunit.1#0",
618
+ "layer.30.comp.block_output.unit.pos.nunit.1#0",
619
+ "layer.31.comp.block_output.unit.pos.nunit.1#0"
620
+ ],
621
+ "transformers_version": "4.39.3"
622
+ }
mores/language_model/intervention_config.json ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "representations": [
3
+ {
4
+ "layer": 0,
5
+ "component": "block_output",
6
+ "low_rank_dimension": 4,
7
+ "intervention": {
8
+ "type": "mores",
9
+ "embed_dim": 2560,
10
+ "low_rank_dimension": 4,
11
+ "dropout": 0.05
12
+ }
13
+ },
14
+ {
15
+ "layer": 1,
16
+ "component": "block_output",
17
+ "low_rank_dimension": 4,
18
+ "intervention": {
19
+ "type": "mores",
20
+ "embed_dim": 2560,
21
+ "low_rank_dimension": 4,
22
+ "dropout": 0.05
23
+ }
24
+ },
25
+ {
26
+ "layer": 2,
27
+ "component": "block_output",
28
+ "low_rank_dimension": 4,
29
+ "intervention": {
30
+ "type": "mores",
31
+ "embed_dim": 2560,
32
+ "low_rank_dimension": 4,
33
+ "dropout": 0.05
34
+ }
35
+ },
36
+ {
37
+ "layer": 3,
38
+ "component": "block_output",
39
+ "low_rank_dimension": 4,
40
+ "intervention": {
41
+ "type": "mores",
42
+ "embed_dim": 2560,
43
+ "low_rank_dimension": 4,
44
+ "dropout": 0.05
45
+ }
46
+ },
47
+ {
48
+ "layer": 4,
49
+ "component": "block_output",
50
+ "low_rank_dimension": 4,
51
+ "intervention": {
52
+ "type": "mores",
53
+ "embed_dim": 2560,
54
+ "low_rank_dimension": 4,
55
+ "dropout": 0.05
56
+ }
57
+ },
58
+ {
59
+ "layer": 5,
60
+ "component": "block_output",
61
+ "low_rank_dimension": 4,
62
+ "intervention": {
63
+ "type": "mores",
64
+ "embed_dim": 2560,
65
+ "low_rank_dimension": 4,
66
+ "dropout": 0.05
67
+ }
68
+ },
69
+ {
70
+ "layer": 6,
71
+ "component": "block_output",
72
+ "low_rank_dimension": 4,
73
+ "intervention": {
74
+ "type": "mores",
75
+ "embed_dim": 2560,
76
+ "low_rank_dimension": 4,
77
+ "dropout": 0.05
78
+ }
79
+ },
80
+ {
81
+ "layer": 7,
82
+ "component": "block_output",
83
+ "low_rank_dimension": 4,
84
+ "intervention": {
85
+ "type": "mores",
86
+ "embed_dim": 2560,
87
+ "low_rank_dimension": 4,
88
+ "dropout": 0.05
89
+ }
90
+ },
91
+ {
92
+ "layer": 8,
93
+ "component": "block_output",
94
+ "low_rank_dimension": 4,
95
+ "intervention": {
96
+ "type": "mores",
97
+ "embed_dim": 2560,
98
+ "low_rank_dimension": 4,
99
+ "dropout": 0.05
100
+ }
101
+ },
102
+ {
103
+ "layer": 9,
104
+ "component": "block_output",
105
+ "low_rank_dimension": 4,
106
+ "intervention": {
107
+ "type": "mores",
108
+ "embed_dim": 2560,
109
+ "low_rank_dimension": 4,
110
+ "dropout": 0.05
111
+ }
112
+ },
113
+ {
114
+ "layer": 10,
115
+ "component": "block_output",
116
+ "low_rank_dimension": 4,
117
+ "intervention": {
118
+ "type": "mores",
119
+ "embed_dim": 2560,
120
+ "low_rank_dimension": 4,
121
+ "dropout": 0.05
122
+ }
123
+ },
124
+ {
125
+ "layer": 11,
126
+ "component": "block_output",
127
+ "low_rank_dimension": 4,
128
+ "intervention": {
129
+ "type": "mores",
130
+ "embed_dim": 2560,
131
+ "low_rank_dimension": 4,
132
+ "dropout": 0.05
133
+ }
134
+ },
135
+ {
136
+ "layer": 12,
137
+ "component": "block_output",
138
+ "low_rank_dimension": 4,
139
+ "intervention": {
140
+ "type": "mores",
141
+ "embed_dim": 2560,
142
+ "low_rank_dimension": 4,
143
+ "dropout": 0.05
144
+ }
145
+ },
146
+ {
147
+ "layer": 13,
148
+ "component": "block_output",
149
+ "low_rank_dimension": 4,
150
+ "intervention": {
151
+ "type": "mores",
152
+ "embed_dim": 2560,
153
+ "low_rank_dimension": 4,
154
+ "dropout": 0.05
155
+ }
156
+ },
157
+ {
158
+ "layer": 14,
159
+ "component": "block_output",
160
+ "low_rank_dimension": 4,
161
+ "intervention": {
162
+ "type": "mores",
163
+ "embed_dim": 2560,
164
+ "low_rank_dimension": 4,
165
+ "dropout": 0.05
166
+ }
167
+ },
168
+ {
169
+ "layer": 15,
170
+ "component": "block_output",
171
+ "low_rank_dimension": 4,
172
+ "intervention": {
173
+ "type": "mores",
174
+ "embed_dim": 2560,
175
+ "low_rank_dimension": 4,
176
+ "dropout": 0.05
177
+ }
178
+ },
179
+ {
180
+ "layer": 16,
181
+ "component": "block_output",
182
+ "low_rank_dimension": 4,
183
+ "intervention": {
184
+ "type": "mores",
185
+ "embed_dim": 2560,
186
+ "low_rank_dimension": 4,
187
+ "dropout": 0.05
188
+ }
189
+ },
190
+ {
191
+ "layer": 17,
192
+ "component": "block_output",
193
+ "low_rank_dimension": 4,
194
+ "intervention": {
195
+ "type": "mores",
196
+ "embed_dim": 2560,
197
+ "low_rank_dimension": 4,
198
+ "dropout": 0.05
199
+ }
200
+ },
201
+ {
202
+ "layer": 18,
203
+ "component": "block_output",
204
+ "low_rank_dimension": 4,
205
+ "intervention": {
206
+ "type": "mores",
207
+ "embed_dim": 2560,
208
+ "low_rank_dimension": 4,
209
+ "dropout": 0.05
210
+ }
211
+ },
212
+ {
213
+ "layer": 19,
214
+ "component": "block_output",
215
+ "low_rank_dimension": 4,
216
+ "intervention": {
217
+ "type": "mores",
218
+ "embed_dim": 2560,
219
+ "low_rank_dimension": 4,
220
+ "dropout": 0.05
221
+ }
222
+ },
223
+ {
224
+ "layer": 20,
225
+ "component": "block_output",
226
+ "low_rank_dimension": 4,
227
+ "intervention": {
228
+ "type": "mores",
229
+ "embed_dim": 2560,
230
+ "low_rank_dimension": 4,
231
+ "dropout": 0.05
232
+ }
233
+ },
234
+ {
235
+ "layer": 21,
236
+ "component": "block_output",
237
+ "low_rank_dimension": 4,
238
+ "intervention": {
239
+ "type": "mores",
240
+ "embed_dim": 2560,
241
+ "low_rank_dimension": 4,
242
+ "dropout": 0.05
243
+ }
244
+ },
245
+ {
246
+ "layer": 22,
247
+ "component": "block_output",
248
+ "low_rank_dimension": 4,
249
+ "intervention": {
250
+ "type": "mores",
251
+ "embed_dim": 2560,
252
+ "low_rank_dimension": 4,
253
+ "dropout": 0.05
254
+ }
255
+ },
256
+ {
257
+ "layer": 23,
258
+ "component": "block_output",
259
+ "low_rank_dimension": 4,
260
+ "intervention": {
261
+ "type": "mores",
262
+ "embed_dim": 2560,
263
+ "low_rank_dimension": 4,
264
+ "dropout": 0.05
265
+ }
266
+ },
267
+ {
268
+ "layer": 24,
269
+ "component": "block_output",
270
+ "low_rank_dimension": 4,
271
+ "intervention": {
272
+ "type": "mores",
273
+ "embed_dim": 2560,
274
+ "low_rank_dimension": 4,
275
+ "dropout": 0.05
276
+ }
277
+ },
278
+ {
279
+ "layer": 25,
280
+ "component": "block_output",
281
+ "low_rank_dimension": 4,
282
+ "intervention": {
283
+ "type": "mores",
284
+ "embed_dim": 2560,
285
+ "low_rank_dimension": 4,
286
+ "dropout": 0.05
287
+ }
288
+ },
289
+ {
290
+ "layer": 26,
291
+ "component": "block_output",
292
+ "low_rank_dimension": 4,
293
+ "intervention": {
294
+ "type": "mores",
295
+ "embed_dim": 2560,
296
+ "low_rank_dimension": 4,
297
+ "dropout": 0.05
298
+ }
299
+ },
300
+ {
301
+ "layer": 27,
302
+ "component": "block_output",
303
+ "low_rank_dimension": 4,
304
+ "intervention": {
305
+ "type": "mores",
306
+ "embed_dim": 2560,
307
+ "low_rank_dimension": 4,
308
+ "dropout": 0.05
309
+ }
310
+ },
311
+ {
312
+ "layer": 28,
313
+ "component": "block_output",
314
+ "low_rank_dimension": 4,
315
+ "intervention": {
316
+ "type": "mores",
317
+ "embed_dim": 2560,
318
+ "low_rank_dimension": 4,
319
+ "dropout": 0.05
320
+ }
321
+ },
322
+ {
323
+ "layer": 29,
324
+ "component": "block_output",
325
+ "low_rank_dimension": 4,
326
+ "intervention": {
327
+ "type": "mores",
328
+ "embed_dim": 2560,
329
+ "low_rank_dimension": 4,
330
+ "dropout": 0.05
331
+ }
332
+ },
333
+ {
334
+ "layer": 30,
335
+ "component": "block_output",
336
+ "low_rank_dimension": 4,
337
+ "intervention": {
338
+ "type": "mores",
339
+ "embed_dim": 2560,
340
+ "low_rank_dimension": 4,
341
+ "dropout": 0.05
342
+ }
343
+ },
344
+ {
345
+ "layer": 31,
346
+ "component": "block_output",
347
+ "low_rank_dimension": 4,
348
+ "intervention": {
349
+ "type": "mores",
350
+ "embed_dim": 2560,
351
+ "low_rank_dimension": 4,
352
+ "dropout": 0.05
353
+ }
354
+ }
355
+ ]
356
+ }
mores/language_model/intkey_layer.0.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b4edb4cfb1d7ae68ea3f8a44d228ca922b03a076de72891addc5db58d64f1a
3
+ size 63560
mores/language_model/intkey_layer.1.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d0594ea27ff3f1cc867e023fc4f41183069aa6374edb50803a6c5877809eb0
3
+ size 63560
mores/language_model/intkey_layer.10.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35d45b3fa179107f9ba5cf078632bb166bc963a615e7d2adc64067bc8b7aaaf2
3
+ size 63567
mores/language_model/intkey_layer.11.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd985ce425b214118fe5ad16c05a34a8d3ba5cab13b1dcd562f48fe57dbfc9f4
3
+ size 63567
mores/language_model/intkey_layer.12.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53348905931df778d6233387d574dbd29f76f35aa83391fd625ac122d2339eb
3
+ size 63567
mores/language_model/intkey_layer.13.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c64c4b39b9906f468f8559170c4f13520ea8a648539331d028e6e4c97ddd6321
3
+ size 63567
mores/language_model/intkey_layer.14.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:588e000b9be651851fdc193e306f7c490a2e5cad65a35a5a54dad56eb77cc205
3
+ size 63567
mores/language_model/intkey_layer.15.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c967032cf751551b1c9636d5f75f0c98b54dfc3494a2eb7026869ef05abe7c15
3
+ size 63567
mores/language_model/intkey_layer.16.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88efd22fc040096081b3e9ed724eb7da00e1e581f6a0481724e200163991192c
3
+ size 63567
mores/language_model/intkey_layer.17.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fc0e2115bc3b0ee4faddded685376b04959b7a11fe5e671f00407bede431285
3
+ size 63567
mores/language_model/intkey_layer.18.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:131513a60a46fc0eebba00f0588bd758f06c59c76e6c585b4b749a91e6eb2569
3
+ size 63567
mores/language_model/intkey_layer.19.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b7b34e009459e649a626f6c2b0fb955541d4e0394a2b8be57d7b90616ca479d
3
+ size 63567
mores/language_model/intkey_layer.2.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa73fb595ee4a665896d70cc1fab76e80b04e4aed57f293e07ada4b0805c9cca
3
+ size 63560
mores/language_model/intkey_layer.20.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dde9fb488feacf834e565d328f6374bac87c9876d4bd9820fe4dd0d31cb4d1e
3
+ size 63567
mores/language_model/intkey_layer.21.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edfc425d2b072f409996e8784669ca7a9015bbeb550ac31b9db44b82d43c9761
3
+ size 63567
mores/language_model/intkey_layer.22.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20fc574bc77e0d608e5cf57c9b4e0c4f3938563716f4cc84b11387eb2a074005
3
+ size 63567
mores/language_model/intkey_layer.23.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f7066bfe093d517487357ddb8bb606b8bc77868d97d3a320cca017ce0cfe95
3
+ size 63567
mores/language_model/intkey_layer.24.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef25fd4ceb7af5fb800c414cb8771e2f1c0e9daf3bf2610d722175e7ad3b596
3
+ size 63567
mores/language_model/intkey_layer.25.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d71386217bba105b416fdaafa5839fbf040702a4aeccbc76b894bebb417fa389
3
+ size 63567
mores/language_model/intkey_layer.26.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caee12bc5322ff67c5934e603022951360b73ee9201b143e44568e078f5363de
3
+ size 63567
mores/language_model/intkey_layer.27.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f96c58801aa0a2a96e303de19a9058edbc298d01911aa687caa11a2ef420064d
3
+ size 63567
mores/language_model/intkey_layer.28.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdd1a79ebd43d2f4f986e7fdba1191cd0cadfbdf897efebae7b6028609c3201
3
+ size 63567
mores/language_model/intkey_layer.29.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f175213cf683885c34215ba7b485247d298ed4b17b9180fdccc758a88a3632d3
3
+ size 63567
mores/language_model/intkey_layer.3.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59dcce6a95b4b4bdeb5241c3cf5c818c5b396d54fb39e5e238fac6bfa61bcde7
3
+ size 63560
mores/language_model/intkey_layer.30.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5cec01073e24afd3edd6033f1395e18d2517b43abd0ac504a5af184bd24cff
3
+ size 63567
mores/language_model/intkey_layer.31.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:765dfa71b1391df34b7cc32b7f6258f7f956d593ea90f463ba0a87c4e8b91a4d
3
+ size 63567
mores/language_model/intkey_layer.4.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efe004dacfffa763f24e31ff8c96ec706392e6a7cae0a4b4da4284fd1d836902
3
+ size 63560
mores/language_model/intkey_layer.5.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:986d1290c2e427da2364ee1587d7ad7f620d91329989178195b55d4c8d92ddb3
3
+ size 63560
mores/language_model/intkey_layer.6.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:152cfb8cf8dd3f6813c71e7d9b5d95b558ce6eef2904a2a9d87f6d170374ba72
3
+ size 63560
mores/language_model/intkey_layer.7.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f07d469d2bd76a423db4b2033d1a709569af0d1e2a091660da6d66a77eb08d6
3
+ size 63560
mores/language_model/intkey_layer.8.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a09e04bc1a7d0f7cfa961f9a24980e754da0e5dc36a69e56ec8b6b0e4c1095de
3
+ size 63560
mores/language_model/intkey_layer.9.comp.block_output.unit.pos.nunit.1#0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99388787564c281181e68dc1c026ea46bab2993a5a5db9520a9f8c4ada5c54f2
3
+ size 63560
mores/language_model/mores_pos_configs.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intervention_positions": "f4+l5",
3
+ "mores_share_weights": true,
4
+ "intervened_prompt_part": "first_round",
5
+ "intervene_modality": "vis",
6
+ "num_interventions": {
7
+ "llm": 32
8
+ },
9
+ "img_embed_token_len": 728
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "50257": {
14
+ "content": " ",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": false
20
+ },
21
+ "50258": {
22
+ "content": " ",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": false
28
+ },
29
+ "50259": {
30
+ "content": " ",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": false
36
+ },
37
+ "50260": {
38
+ "content": " ",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": false
44
+ },
45
+ "50261": {
46
+ "content": " ",
47
+ "lstrip": false,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": false
52
+ },
53
+ "50262": {
54
+ "content": " ",
55
+ "lstrip": false,
56
+ "normalized": true,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": false
60
+ },
61
+ "50263": {
62
+ "content": " ",
63
+ "lstrip": false,
64
+ "normalized": true,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": false
68
+ },
69
+ "50264": {
70
+ "content": " ",
71
+ "lstrip": false,
72
+ "normalized": true,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": false
76
+ },
77
+ "50265": {
78
+ "content": " ",
79
+ "lstrip": false,
80
+ "normalized": true,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": false
84
+ },
85
+ "50266": {
86
+ "content": " ",
87
+ "lstrip": false,
88
+ "normalized": true,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": false
92
+ },
93
+ "50267": {
94
+ "content": " ",
95
+ "lstrip": false,
96
+ "normalized": true,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": false
100
+ },
101
+ "50268": {
102
+ "content": " ",
103
+ "lstrip": false,
104
+ "normalized": true,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": false
108
+ },
109
+ "50269": {
110
+ "content": " ",
111
+ "lstrip": false,
112
+ "normalized": true,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": false
116
+ },
117
+ "50270": {
118
+ "content": " ",
119
+ "lstrip": false,
120
+ "normalized": true,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "50271": {
126
+ "content": " ",
127
+ "lstrip": false,
128
+ "normalized": true,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "50272": {
134
+ "content": " ",
135
+ "lstrip": false,
136
+ "normalized": true,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "50273": {
142
+ "content": " ",
143
+ "lstrip": false,
144
+ "normalized": true,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "50274": {
150
+ "content": " ",
151
+ "lstrip": false,
152
+ "normalized": true,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "50275": {
158
+ "content": " ",
159
+ "lstrip": false,
160
+ "normalized": true,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "50276": {
166
+ "content": " ",
167
+ "lstrip": false,
168
+ "normalized": true,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "50277": {
174
+ "content": " ",
175
+ "lstrip": false,
176
+ "normalized": true,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "50278": {
182
+ "content": " ",
183
+ "lstrip": false,
184
+ "normalized": true,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "50279": {
190
+ "content": " ",
191
+ "lstrip": false,
192
+ "normalized": true,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "50280": {
198
+ "content": " ",
199
+ "lstrip": false,
200
+ "normalized": true,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "50281": {
206
+ "content": " ",
207
+ "lstrip": false,
208
+ "normalized": true,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ },
213
+ "50282": {
214
+ "content": " ",
215
+ "lstrip": false,
216
+ "normalized": true,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": false
220
+ },
221
+ "50283": {
222
+ "content": " ",
223
+ "lstrip": false,
224
+ "normalized": true,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": false
228
+ },
229
+ "50284": {
230
+ "content": " ",
231
+ "lstrip": false,
232
+ "normalized": true,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": false
236
+ },
237
+ "50285": {
238
+ "content": " ",
239
+ "lstrip": false,
240
+ "normalized": true,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": false
244
+ },
245
+ "50286": {
246
+ "content": " ",
247
+ "lstrip": false,
248
+ "normalized": true,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": false
252
+ },
253
+ "50287": {
254
+ "content": "\t\t\t\t\t\t\t\t\t",
255
+ "lstrip": false,
256
+ "normalized": true,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": false
260
+ },
261
+ "50288": {
262
+ "content": "\t\t\t\t\t\t\t\t",
263
+ "lstrip": false,
264
+ "normalized": true,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": false
268
+ },
269
+ "50289": {
270
+ "content": "\t\t\t\t\t\t\t",
271
+ "lstrip": false,
272
+ "normalized": true,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": false
276
+ },
277
+ "50290": {
278
+ "content": "\t\t\t\t\t\t",
279
+ "lstrip": false,
280
+ "normalized": true,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": false
284
+ },
285
+ "50291": {
286
+ "content": "\t\t\t\t\t",
287
+ "lstrip": false,
288
+ "normalized": true,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": false
292
+ },
293
+ "50292": {
294
+ "content": "\t\t\t\t",
295
+ "lstrip": false,
296
+ "normalized": true,
297
+ "rstrip": false,
298
+ "single_word": false,
299
+ "special": false
300
+ },
301
+ "50293": {
302
+ "content": "\t\t\t",
303
+ "lstrip": false,
304
+ "normalized": true,
305
+ "rstrip": false,
306
+ "single_word": false,
307
+ "special": false
308
+ },
309
+ "50294": {
310
+ "content": "\t\t",
311
+ "lstrip": false,
312
+ "normalized": true,
313
+ "rstrip": false,
314
+ "single_word": false,
315
+ "special": false
316
+ }
317
+ },
318
+ "bos_token": "<|endoftext|>",
319
+ "clean_up_tokenization_spaces": true,
320
+ "eos_token": "<|endoftext|>",
321
+ "errors": "replace",
322
+ "model_max_length": 3072,
323
+ "pad_token": "<|endoftext|>",
324
+ "padding_side": "right",
325
+ "tokenizer_class": "CodeGenTokenizer",
326
+ "unk_token": "<|endoftext|>"
327
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
vision_tower/config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_dropout": 0.0,
3
+ "hidden_act": "gelu_pytorch_tanh",
4
+ "hidden_size": 1152,
5
+ "image_size": 384,
6
+ "intermediate_size": 4304,
7
+ "layer_norm_eps": 1e-06,
8
+ "model_name_or_path": "google/siglip-so400m-patch14-384",
9
+ "model_name_or_path2": "",
10
+ "model_type": "siglip_vision_model",
11
+ "num_attention_heads": 16,
12
+ "num_channels": 3,
13
+ "num_hidden_layers": 27,
14
+ "patch_size": 14,
15
+ "transformers_version": "4.39.3"
16
+ }
vision_tower/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c03cdf872045ec4e0d757bc8cd45adb18f6b4d30384c9f793d11793371c836f
3
+ size 856599050
vocab.json ADDED
The diff for this file is too large to render. See raw diff