BonanDing commited on
Commit
00501ee
·
verified ·
1 Parent(s): 4ee4d25

Add UniMVU release checkpoint: unimvu_0.5B_avqa

Browse files
unimvu_0.5B_avqa/adapter_config.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "lmms-lab/llava-onevision-qwen2-0.5b-ov",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 128,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 64,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "model.layers.5.mlp.up_proj",
24
+ "model.layers.11.mlp.up_proj",
25
+ "model.layers.1.self_attn.q_proj",
26
+ "model.layers.5.self_attn.o_proj",
27
+ "model.layers.13.mlp.down_proj",
28
+ "model.layers.8.mlp.down_proj",
29
+ "model.layers.0.mlp.down_proj",
30
+ "model.layers.17.mlp.gate_proj",
31
+ "model.layers.21.self_attn.v_proj",
32
+ "model.layers.6.self_attn.q_proj",
33
+ "model.layers.22.mlp.gate_proj",
34
+ "model.layers.20.self_attn.v_proj",
35
+ "model.layers.18.mlp.up_proj",
36
+ "model.layers.15.self_attn.q_proj",
37
+ "model.layers.16.self_attn.k_proj",
38
+ "model.layers.7.self_attn.o_proj",
39
+ "model.layers.16.self_attn.q_proj",
40
+ "model.layers.0.self_attn.q_proj",
41
+ "model.layers.19.self_attn.o_proj",
42
+ "model.layers.21.self_attn.o_proj",
43
+ "model.layers.1.mlp.up_proj",
44
+ "model.layers.2.mlp.down_proj",
45
+ "model.layers.2.self_attn.k_proj",
46
+ "model.layers.1.mlp.down_proj",
47
+ "model.layers.12.mlp.up_proj",
48
+ "model.layers.9.mlp.down_proj",
49
+ "model.layers.7.self_attn.q_proj",
50
+ "model.layers.10.self_attn.o_proj",
51
+ "model.layers.19.mlp.gate_proj",
52
+ "model.layers.7.mlp.down_proj",
53
+ "model.layers.16.self_attn.v_proj",
54
+ "model.layers.10.self_attn.k_proj",
55
+ "model.layers.3.mlp.up_proj",
56
+ "model.layers.20.mlp.down_proj",
57
+ "model.layers.7.self_attn.v_proj",
58
+ "model.layers.23.mlp.down_proj",
59
+ "model.layers.20.mlp.gate_proj",
60
+ "model.layers.16.mlp.gate_proj",
61
+ "model.layers.3.self_attn.k_proj",
62
+ "model.layers.8.self_attn.k_proj",
63
+ "model.layers.12.mlp.down_proj",
64
+ "model.layers.15.mlp.up_proj",
65
+ "model.layers.4.self_attn.o_proj",
66
+ "model.layers.9.mlp.gate_proj",
67
+ "model.layers.22.mlp.up_proj",
68
+ "model.layers.15.mlp.down_proj",
69
+ "model.layers.13.mlp.up_proj",
70
+ "model.layers.1.self_attn.k_proj",
71
+ "model.layers.0.mlp.gate_proj",
72
+ "model.layers.2.self_attn.o_proj",
73
+ "model.layers.8.mlp.up_proj",
74
+ "model.layers.3.mlp.gate_proj",
75
+ "model.layers.14.self_attn.v_proj",
76
+ "model.layers.3.self_attn.v_proj",
77
+ "model.layers.2.mlp.up_proj",
78
+ "model.layers.12.self_attn.v_proj",
79
+ "model.layers.4.mlp.gate_proj",
80
+ "model.layers.12.self_attn.o_proj",
81
+ "model.layers.16.mlp.down_proj",
82
+ "model.layers.7.mlp.up_proj",
83
+ "model.layers.19.mlp.up_proj",
84
+ "model.layers.19.self_attn.k_proj",
85
+ "model.layers.22.self_attn.v_proj",
86
+ "model.layers.11.self_attn.v_proj",
87
+ "model.layers.10.self_attn.v_proj",
88
+ "model.layers.17.self_attn.o_proj",
89
+ "model.layers.2.self_attn.v_proj",
90
+ "model.layers.7.mlp.gate_proj",
91
+ "model.layers.23.self_attn.k_proj",
92
+ "model.layers.4.self_attn.v_proj",
93
+ "model.layers.15.self_attn.v_proj",
94
+ "model.layers.14.self_attn.k_proj",
95
+ "model.layers.18.self_attn.q_proj",
96
+ "model.layers.19.mlp.down_proj",
97
+ "model.layers.9.self_attn.k_proj",
98
+ "model.layers.16.self_attn.o_proj",
99
+ "model.layers.21.mlp.down_proj",
100
+ "model.layers.4.mlp.down_proj",
101
+ "model.layers.8.self_attn.q_proj",
102
+ "model.layers.17.self_attn.k_proj",
103
+ "model.layers.23.mlp.up_proj",
104
+ "model.layers.4.self_attn.q_proj",
105
+ "model.layers.5.self_attn.q_proj",
106
+ "model.layers.3.self_attn.q_proj",
107
+ "model.layers.13.mlp.gate_proj",
108
+ "model.layers.6.mlp.gate_proj",
109
+ "model.layers.5.self_attn.k_proj",
110
+ "model.layers.21.mlp.up_proj",
111
+ "model.layers.12.mlp.gate_proj",
112
+ "model.layers.7.self_attn.k_proj",
113
+ "model.layers.19.self_attn.q_proj",
114
+ "model.layers.6.self_attn.v_proj",
115
+ "model.layers.14.self_attn.q_proj",
116
+ "model.layers.16.mlp.up_proj",
117
+ "model.layers.0.mlp.up_proj",
118
+ "model.layers.11.mlp.down_proj",
119
+ "model.layers.9.self_attn.v_proj",
120
+ "model.layers.6.mlp.up_proj",
121
+ "model.layers.20.self_attn.o_proj",
122
+ "model.layers.0.self_attn.o_proj",
123
+ "model.layers.4.mlp.up_proj",
124
+ "model.layers.14.mlp.down_proj",
125
+ "model.layers.8.self_attn.v_proj",
126
+ "model.layers.13.self_attn.o_proj",
127
+ "model.layers.6.self_attn.o_proj",
128
+ "model.layers.0.self_attn.k_proj",
129
+ "model.layers.23.self_attn.v_proj",
130
+ "model.layers.13.self_attn.k_proj",
131
+ "model.layers.2.mlp.gate_proj",
132
+ "model.layers.12.self_attn.k_proj",
133
+ "model.layers.18.self_attn.k_proj",
134
+ "model.layers.8.self_attn.o_proj",
135
+ "model.layers.1.mlp.gate_proj",
136
+ "model.layers.13.self_attn.q_proj",
137
+ "model.layers.23.self_attn.o_proj",
138
+ "model.layers.5.self_attn.v_proj",
139
+ "model.layers.18.self_attn.v_proj",
140
+ "model.layers.18.mlp.gate_proj",
141
+ "model.layers.21.mlp.gate_proj",
142
+ "model.layers.8.mlp.gate_proj",
143
+ "model.layers.10.self_attn.q_proj",
144
+ "model.layers.0.self_attn.v_proj",
145
+ "model.layers.20.mlp.up_proj",
146
+ "model.layers.4.self_attn.k_proj",
147
+ "model.layers.6.mlp.down_proj",
148
+ "model.layers.18.mlp.down_proj",
149
+ "model.layers.11.self_attn.o_proj",
150
+ "model.layers.13.self_attn.v_proj",
151
+ "model.layers.21.self_attn.k_proj",
152
+ "model.layers.22.self_attn.k_proj",
153
+ "model.layers.22.self_attn.q_proj",
154
+ "model.layers.17.mlp.up_proj",
155
+ "model.layers.17.self_attn.q_proj",
156
+ "model.layers.5.mlp.gate_proj",
157
+ "model.layers.10.mlp.down_proj",
158
+ "model.layers.19.self_attn.v_proj",
159
+ "model.layers.17.mlp.down_proj",
160
+ "model.layers.3.mlp.down_proj",
161
+ "model.layers.22.self_attn.o_proj",
162
+ "model.layers.17.self_attn.v_proj",
163
+ "model.layers.14.mlp.up_proj",
164
+ "model.layers.11.mlp.gate_proj",
165
+ "model.layers.23.mlp.gate_proj",
166
+ "model.layers.11.self_attn.k_proj",
167
+ "model.layers.23.self_attn.q_proj",
168
+ "model.layers.9.mlp.up_proj",
169
+ "model.layers.9.self_attn.o_proj",
170
+ "model.layers.11.self_attn.q_proj",
171
+ "model.layers.1.self_attn.o_proj",
172
+ "model.layers.2.self_attn.q_proj",
173
+ "model.layers.10.mlp.gate_proj",
174
+ "model.layers.21.self_attn.q_proj",
175
+ "model.layers.22.mlp.down_proj",
176
+ "model.layers.1.self_attn.v_proj",
177
+ "model.layers.6.self_attn.k_proj",
178
+ "model.layers.14.mlp.gate_proj",
179
+ "model.layers.14.self_attn.o_proj",
180
+ "model.layers.15.self_attn.k_proj",
181
+ "model.layers.20.self_attn.q_proj",
182
+ "model.layers.9.self_attn.q_proj",
183
+ "model.layers.5.mlp.down_proj",
184
+ "model.layers.18.self_attn.o_proj",
185
+ "model.layers.20.self_attn.k_proj",
186
+ "model.layers.10.mlp.up_proj",
187
+ "model.layers.12.self_attn.q_proj",
188
+ "model.layers.15.mlp.gate_proj",
189
+ "model.layers.3.self_attn.o_proj",
190
+ "model.layers.15.self_attn.o_proj"
191
+ ],
192
+ "task_type": "CAUSAL_LM",
193
+ "use_dora": false,
194
+ "use_rslora": false
195
+ }
unimvu_0.5B_avqa/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63334885a08746d75803935f77a81134e6ee4c08c7d430565db53914d4308f76
3
+ size 70430368
unimvu_0.5B_avqa/config.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "lmms-lab/llava-onevision-qwen2-0.5b-ov",
3
+ "architectures": [
4
+ "LlavaQwenForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "feat_combine_method": "concat",
10
+ "hidden_act": "silu",
11
+ "hidden_size": 896,
12
+ "ignore_index": -100,
13
+ "image_aspect_ratio": "anyres_max_9",
14
+ "image_crop_resolution": null,
15
+ "image_grid_pinpoints": [
16
+ [
17
+ 384,
18
+ 384
19
+ ],
20
+ [
21
+ 384,
22
+ 768
23
+ ],
24
+ [
25
+ 384,
26
+ 1152
27
+ ],
28
+ [
29
+ 384,
30
+ 1536
31
+ ],
32
+ [
33
+ 384,
34
+ 1920
35
+ ],
36
+ [
37
+ 384,
38
+ 2304
39
+ ],
40
+ [
41
+ 768,
42
+ 384
43
+ ],
44
+ [
45
+ 768,
46
+ 768
47
+ ],
48
+ [
49
+ 768,
50
+ 1152
51
+ ],
52
+ [
53
+ 768,
54
+ 1536
55
+ ],
56
+ [
57
+ 768,
58
+ 1920
59
+ ],
60
+ [
61
+ 768,
62
+ 2304
63
+ ],
64
+ [
65
+ 1152,
66
+ 384
67
+ ],
68
+ [
69
+ 1152,
70
+ 768
71
+ ],
72
+ [
73
+ 1152,
74
+ 1152
75
+ ],
76
+ [
77
+ 1152,
78
+ 1536
79
+ ],
80
+ [
81
+ 1152,
82
+ 1920
83
+ ],
84
+ [
85
+ 1152,
86
+ 2304
87
+ ],
88
+ [
89
+ 1536,
90
+ 384
91
+ ],
92
+ [
93
+ 1536,
94
+ 768
95
+ ],
96
+ [
97
+ 1536,
98
+ 1152
99
+ ],
100
+ [
101
+ 1536,
102
+ 1536
103
+ ],
104
+ [
105
+ 1536,
106
+ 1920
107
+ ],
108
+ [
109
+ 1536,
110
+ 2304
111
+ ],
112
+ [
113
+ 1920,
114
+ 384
115
+ ],
116
+ [
117
+ 1920,
118
+ 768
119
+ ],
120
+ [
121
+ 1920,
122
+ 1152
123
+ ],
124
+ [
125
+ 1920,
126
+ 1536
127
+ ],
128
+ [
129
+ 1920,
130
+ 1920
131
+ ],
132
+ [
133
+ 1920,
134
+ 2304
135
+ ],
136
+ [
137
+ 2304,
138
+ 384
139
+ ],
140
+ [
141
+ 2304,
142
+ 768
143
+ ],
144
+ [
145
+ 2304,
146
+ 1152
147
+ ],
148
+ [
149
+ 2304,
150
+ 1536
151
+ ],
152
+ [
153
+ 2304,
154
+ 1920
155
+ ],
156
+ [
157
+ 2304,
158
+ 2304
159
+ ]
160
+ ],
161
+ "image_split_resolution": null,
162
+ "image_token_index": 151646,
163
+ "initializer_range": 0.02,
164
+ "input_dim": 1024,
165
+ "intermediate_size": 4864,
166
+ "max_position_embeddings": 32768,
167
+ "max_window_layers": 24,
168
+ "mm_hidden_size": 1152,
169
+ "mm_newline_position": "grid",
170
+ "mm_patch_merge_type": "spatial_unpad",
171
+ "mm_projector_lr": null,
172
+ "mm_projector_type": "mlp2x_gelu",
173
+ "mm_resampler_type": null,
174
+ "mm_spatial_pool_mode": "bilinear",
175
+ "mm_spatial_pool_out_channels": null,
176
+ "mm_spatial_pool_stride": 2,
177
+ "mm_tunable_parts": "mm_vision_tower,mm_mlp_adapter,mm_language_model",
178
+ "mm_use_im_patch_token": false,
179
+ "mm_use_im_start_end": false,
180
+ "mm_video_tower": null,
181
+ "mm_vision_select_feature": "patch",
182
+ "mm_vision_select_layer": -2,
183
+ "mm_vision_tower": "google/siglip-so400m-patch14-384",
184
+ "mm_vision_tower_lr": 2e-06,
185
+ "modality_aggregator_attention_dropout": 0.0,
186
+ "modality_aggregator_config": {
187
+ "attention_dropout": 0.0,
188
+ "hidden_size": 896,
189
+ "modality_token_num": 1,
190
+ "num_heads": 14,
191
+ "num_key_value_heads": 14,
192
+ "rope_theta": 250000
193
+ },
194
+ "modality_aggregator_hidden_size": 896,
195
+ "modality_aggregator_modality_token_num": 1,
196
+ "modality_aggregator_num_heads": 14,
197
+ "modality_aggregator_num_key_value_heads": 14,
198
+ "modality_aggregator_rope_theta": 250000,
199
+ "modality_input_dims": null,
200
+ "model_name_or_path": "lmms-lab/llava-onevision-qwen2-0.5b-ov",
201
+ "model_type": "unimvu_v3_qwen2",
202
+ "num_attention_heads": 14,
203
+ "num_cross_modality_hidden_layers": 1,
204
+ "num_hidden_layers": 24,
205
+ "num_key_value_heads": 2,
206
+ "pos_skipping_range": 4096,
207
+ "pretrain_mm_mlp_adapter": null,
208
+ "projector_hidden_act": "gelu",
209
+ "rms_norm_eps": 1e-06,
210
+ "rope_scaling": null,
211
+ "rope_theta": 1000000.0,
212
+ "sliding_window": 32768,
213
+ "support_modalities": [
214
+ "video",
215
+ "audio"
216
+ ],
217
+ "text_config": {
218
+ "_name_or_path": "",
219
+ "add_cross_attention": false,
220
+ "architectures": null,
221
+ "attention_bias": false,
222
+ "attention_dropout": 0.0,
223
+ "bad_words_ids": null,
224
+ "begin_suppress_tokens": null,
225
+ "bos_token_id": 1,
226
+ "chunk_size_feed_forward": 0,
227
+ "cross_attention_hidden_size": null,
228
+ "decoder_start_token_id": null,
229
+ "diversity_penalty": 0.0,
230
+ "do_sample": false,
231
+ "early_stopping": false,
232
+ "encoder_no_repeat_ngram_size": 0,
233
+ "eos_token_id": 2,
234
+ "exponential_decay_length_penalty": null,
235
+ "finetuning_task": null,
236
+ "forced_bos_token_id": null,
237
+ "forced_eos_token_id": null,
238
+ "hidden_act": "silu",
239
+ "hidden_size": 4096,
240
+ "id2label": {
241
+ "0": "LABEL_0",
242
+ "1": "LABEL_1"
243
+ },
244
+ "initializer_range": 0.02,
245
+ "intermediate_size": 11008,
246
+ "is_decoder": false,
247
+ "is_encoder_decoder": false,
248
+ "label2id": {
249
+ "LABEL_0": 0,
250
+ "LABEL_1": 1
251
+ },
252
+ "length_penalty": 1.0,
253
+ "max_length": 20,
254
+ "max_position_embeddings": 2048,
255
+ "min_length": 0,
256
+ "model_type": "llama",
257
+ "no_repeat_ngram_size": 0,
258
+ "num_attention_heads": 32,
259
+ "num_beam_groups": 1,
260
+ "num_beams": 1,
261
+ "num_hidden_layers": 32,
262
+ "num_key_value_heads": 32,
263
+ "num_return_sequences": 1,
264
+ "output_attentions": false,
265
+ "output_hidden_states": false,
266
+ "output_scores": false,
267
+ "pad_token_id": null,
268
+ "prefix": null,
269
+ "pretraining_tp": 1,
270
+ "problem_type": null,
271
+ "pruned_heads": {},
272
+ "remove_invalid_values": false,
273
+ "repetition_penalty": 1.0,
274
+ "return_dict": true,
275
+ "return_dict_in_generate": false,
276
+ "rms_norm_eps": 1e-06,
277
+ "rope_scaling": null,
278
+ "rope_theta": 10000.0,
279
+ "sep_token_id": null,
280
+ "suppress_tokens": null,
281
+ "task_specific_params": null,
282
+ "temperature": 1.0,
283
+ "tf_legacy_loss": false,
284
+ "tie_encoder_decoder": false,
285
+ "tie_word_embeddings": false,
286
+ "tokenizer_class": null,
287
+ "top_k": 50,
288
+ "top_p": 1.0,
289
+ "torch_dtype": null,
290
+ "torchscript": false,
291
+ "typical_p": 1.0,
292
+ "use_bfloat16": false,
293
+ "use_cache": true,
294
+ "vocab_size": 32000
295
+ },
296
+ "tie_word_embeddings": true,
297
+ "tokenizer_model_max_length": 32768,
298
+ "tokenizer_padding_side": "right",
299
+ "torch_dtype": "bfloat16",
300
+ "transformers_version": "4.37.2",
301
+ "tune_addition_token_embeddings": false,
302
+ "tune_mm_mlp_adapter": false,
303
+ "unfreeze_mm_vision_tower": false,
304
+ "use_cache": true,
305
+ "use_mm_proj": true,
306
+ "use_pos_skipping": false,
307
+ "use_sliding_window": false,
308
+ "version": "conv_llava_ov_qwen",
309
+ "video_tower": null,
310
+ "vision_config": {
311
+ "_name_or_path": "",
312
+ "add_cross_attention": false,
313
+ "architectures": null,
314
+ "attention_dropout": 0.0,
315
+ "bad_words_ids": null,
316
+ "begin_suppress_tokens": null,
317
+ "bos_token_id": null,
318
+ "chunk_size_feed_forward": 0,
319
+ "cross_attention_hidden_size": null,
320
+ "decoder_start_token_id": null,
321
+ "diversity_penalty": 0.0,
322
+ "do_sample": false,
323
+ "early_stopping": false,
324
+ "encoder_no_repeat_ngram_size": 0,
325
+ "eos_token_id": null,
326
+ "exponential_decay_length_penalty": null,
327
+ "finetuning_task": null,
328
+ "forced_bos_token_id": null,
329
+ "forced_eos_token_id": null,
330
+ "hidden_act": "quick_gelu",
331
+ "hidden_size": 1024,
332
+ "id2label": {
333
+ "0": "LABEL_0",
334
+ "1": "LABEL_1"
335
+ },
336
+ "image_size": 336,
337
+ "initializer_factor": 1.0,
338
+ "initializer_range": 0.02,
339
+ "intermediate_size": 4096,
340
+ "is_decoder": false,
341
+ "is_encoder_decoder": false,
342
+ "label2id": {
343
+ "LABEL_0": 0,
344
+ "LABEL_1": 1
345
+ },
346
+ "layer_norm_eps": 1e-05,
347
+ "length_penalty": 1.0,
348
+ "max_length": 20,
349
+ "min_length": 0,
350
+ "model_type": "clip_vision_model",
351
+ "no_repeat_ngram_size": 0,
352
+ "num_attention_heads": 16,
353
+ "num_beam_groups": 1,
354
+ "num_beams": 1,
355
+ "num_channels": 3,
356
+ "num_hidden_layers": 24,
357
+ "num_return_sequences": 1,
358
+ "output_attentions": false,
359
+ "output_hidden_states": false,
360
+ "output_scores": false,
361
+ "pad_token_id": null,
362
+ "patch_size": 14,
363
+ "prefix": null,
364
+ "problem_type": null,
365
+ "projection_dim": 768,
366
+ "pruned_heads": {},
367
+ "remove_invalid_values": false,
368
+ "repetition_penalty": 1.0,
369
+ "return_dict": true,
370
+ "return_dict_in_generate": false,
371
+ "sep_token_id": null,
372
+ "suppress_tokens": null,
373
+ "task_specific_params": null,
374
+ "temperature": 1.0,
375
+ "tf_legacy_loss": false,
376
+ "tie_encoder_decoder": false,
377
+ "tie_word_embeddings": true,
378
+ "tokenizer_class": null,
379
+ "top_k": 50,
380
+ "top_p": 1.0,
381
+ "torch_dtype": null,
382
+ "torchscript": false,
383
+ "typical_p": 1.0,
384
+ "use_bfloat16": false,
385
+ "vocab_size": 32000
386
+ },
387
+ "vision_feature_layer": -2,
388
+ "vision_feature_select_strategy": "default",
389
+ "vision_tower": "google/siglip-so400m-patch14-384",
390
+ "vision_tower_pretrained": null,
391
+ "vocab_size": 151647
392
+ }
unimvu_0.5B_avqa/non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:104d72475c216b6147cd9d38871e8d15ce3546b3b8d050536c767aafecd8dd9b
3
+ size 25955038