BonanDing commited on
Commit
0b60cd8
·
verified ·
1 Parent(s): 03cd922

Add UniMVU release checkpoint: unimvu_uni_0.5B

Browse files
unimvu_uni_0.5B/adapter_config.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "lmms-lab/llava-onevision-qwen2-0.5b-ov",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 128,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 64,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "model.layers.8.mlp.up_proj",
24
+ "model.layers.22.self_attn.v_proj",
25
+ "model.layers.13.mlp.gate_proj",
26
+ "model.layers.18.mlp.up_proj",
27
+ "model.layers.3.self_attn.v_proj",
28
+ "model.layers.16.self_attn.k_proj",
29
+ "model.layers.12.self_attn.v_proj",
30
+ "model.layers.18.self_attn.o_proj",
31
+ "model.layers.20.self_attn.v_proj",
32
+ "model.layers.6.self_attn.v_proj",
33
+ "model.layers.15.self_attn.q_proj",
34
+ "model.layers.11.self_attn.v_proj",
35
+ "model.layers.3.mlp.up_proj",
36
+ "model.layers.14.mlp.gate_proj",
37
+ "model.layers.15.self_attn.k_proj",
38
+ "model.layers.21.mlp.down_proj",
39
+ "model.layers.17.self_attn.o_proj",
40
+ "model.layers.5.self_attn.v_proj",
41
+ "model.layers.13.self_attn.o_proj",
42
+ "model.layers.11.mlp.up_proj",
43
+ "model.layers.4.self_attn.v_proj",
44
+ "model.layers.18.self_attn.q_proj",
45
+ "model.layers.16.mlp.gate_proj",
46
+ "model.layers.22.mlp.up_proj",
47
+ "model.layers.14.mlp.up_proj",
48
+ "model.layers.19.mlp.up_proj",
49
+ "model.layers.12.self_attn.o_proj",
50
+ "model.layers.14.self_attn.q_proj",
51
+ "model.layers.23.self_attn.q_proj",
52
+ "model.layers.13.self_attn.q_proj",
53
+ "model.layers.3.mlp.down_proj",
54
+ "model.layers.3.self_attn.o_proj",
55
+ "model.layers.14.mlp.down_proj",
56
+ "model.layers.17.mlp.gate_proj",
57
+ "model.layers.13.mlp.up_proj",
58
+ "model.layers.2.self_attn.k_proj",
59
+ "model.layers.7.self_attn.q_proj",
60
+ "model.layers.0.self_attn.q_proj",
61
+ "model.layers.9.self_attn.q_proj",
62
+ "model.layers.13.self_attn.k_proj",
63
+ "model.layers.21.self_attn.o_proj",
64
+ "model.layers.6.self_attn.q_proj",
65
+ "model.layers.14.self_attn.o_proj",
66
+ "model.layers.21.self_attn.v_proj",
67
+ "model.layers.19.mlp.down_proj",
68
+ "model.layers.4.self_attn.k_proj",
69
+ "model.layers.4.mlp.gate_proj",
70
+ "model.layers.2.self_attn.q_proj",
71
+ "model.layers.6.self_attn.k_proj",
72
+ "model.layers.4.mlp.up_proj",
73
+ "model.layers.19.self_attn.o_proj",
74
+ "model.layers.5.mlp.down_proj",
75
+ "model.layers.20.mlp.up_proj",
76
+ "model.layers.1.mlp.down_proj",
77
+ "model.layers.12.self_attn.k_proj",
78
+ "model.layers.16.self_attn.q_proj",
79
+ "model.layers.12.mlp.down_proj",
80
+ "model.layers.5.self_attn.o_proj",
81
+ "model.layers.1.self_attn.k_proj",
82
+ "model.layers.13.mlp.down_proj",
83
+ "model.layers.22.mlp.down_proj",
84
+ "model.layers.7.mlp.gate_proj",
85
+ "model.layers.11.self_attn.q_proj",
86
+ "model.layers.22.self_attn.q_proj",
87
+ "model.layers.11.mlp.gate_proj",
88
+ "model.layers.11.mlp.down_proj",
89
+ "model.layers.22.mlp.gate_proj",
90
+ "model.layers.16.mlp.down_proj",
91
+ "model.layers.1.mlp.gate_proj",
92
+ "model.layers.9.mlp.gate_proj",
93
+ "model.layers.23.self_attn.k_proj",
94
+ "model.layers.23.mlp.up_proj",
95
+ "model.layers.4.mlp.down_proj",
96
+ "model.layers.9.mlp.down_proj",
97
+ "model.layers.22.self_attn.o_proj",
98
+ "model.layers.20.self_attn.k_proj",
99
+ "model.layers.7.self_attn.k_proj",
100
+ "model.layers.5.self_attn.q_proj",
101
+ "model.layers.2.self_attn.o_proj",
102
+ "model.layers.12.mlp.up_proj",
103
+ "model.layers.5.mlp.up_proj",
104
+ "model.layers.0.mlp.down_proj",
105
+ "model.layers.23.self_attn.v_proj",
106
+ "model.layers.7.mlp.up_proj",
107
+ "model.layers.7.self_attn.o_proj",
108
+ "model.layers.18.self_attn.v_proj",
109
+ "model.layers.8.self_attn.k_proj",
110
+ "model.layers.10.self_attn.v_proj",
111
+ "model.layers.2.mlp.up_proj",
112
+ "model.layers.11.self_attn.k_proj",
113
+ "model.layers.0.self_attn.o_proj",
114
+ "model.layers.23.mlp.down_proj",
115
+ "model.layers.5.mlp.gate_proj",
116
+ "model.layers.6.mlp.up_proj",
117
+ "model.layers.2.self_attn.v_proj",
118
+ "model.layers.8.mlp.gate_proj",
119
+ "model.layers.16.self_attn.v_proj",
120
+ "model.layers.19.self_attn.k_proj",
121
+ "model.layers.14.self_attn.k_proj",
122
+ "model.layers.1.self_attn.q_proj",
123
+ "model.layers.9.self_attn.k_proj",
124
+ "model.layers.3.self_attn.k_proj",
125
+ "model.layers.9.self_attn.v_proj",
126
+ "model.layers.10.self_attn.q_proj",
127
+ "model.layers.1.mlp.up_proj",
128
+ "model.layers.18.self_attn.k_proj",
129
+ "model.layers.17.mlp.up_proj",
130
+ "model.layers.17.mlp.down_proj",
131
+ "model.layers.4.self_attn.q_proj",
132
+ "model.layers.1.self_attn.o_proj",
133
+ "model.layers.23.mlp.gate_proj",
134
+ "model.layers.6.mlp.gate_proj",
135
+ "model.layers.8.self_attn.o_proj",
136
+ "model.layers.21.self_attn.q_proj",
137
+ "model.layers.20.self_attn.o_proj",
138
+ "model.layers.15.self_attn.o_proj",
139
+ "model.layers.8.self_attn.q_proj",
140
+ "model.layers.2.mlp.gate_proj",
141
+ "model.layers.7.mlp.down_proj",
142
+ "model.layers.18.mlp.down_proj",
143
+ "model.layers.2.mlp.down_proj",
144
+ "model.layers.12.mlp.gate_proj",
145
+ "model.layers.0.mlp.up_proj",
146
+ "model.layers.14.self_attn.v_proj",
147
+ "model.layers.15.mlp.down_proj",
148
+ "model.layers.6.mlp.down_proj",
149
+ "model.layers.0.self_attn.k_proj",
150
+ "model.layers.18.mlp.gate_proj",
151
+ "model.layers.10.mlp.down_proj",
152
+ "model.layers.15.self_attn.v_proj",
153
+ "model.layers.21.mlp.up_proj",
154
+ "model.layers.21.mlp.gate_proj",
155
+ "model.layers.15.mlp.gate_proj",
156
+ "model.layers.17.self_attn.v_proj",
157
+ "model.layers.10.self_attn.o_proj",
158
+ "model.layers.1.self_attn.v_proj",
159
+ "model.layers.12.self_attn.q_proj",
160
+ "model.layers.23.self_attn.o_proj",
161
+ "model.layers.19.self_attn.v_proj",
162
+ "model.layers.7.self_attn.v_proj",
163
+ "model.layers.9.self_attn.o_proj",
164
+ "model.layers.16.self_attn.o_proj",
165
+ "model.layers.21.self_attn.k_proj",
166
+ "model.layers.0.mlp.gate_proj",
167
+ "model.layers.19.mlp.gate_proj",
168
+ "model.layers.4.self_attn.o_proj",
169
+ "model.layers.10.mlp.up_proj",
170
+ "model.layers.0.self_attn.v_proj",
171
+ "model.layers.8.self_attn.v_proj",
172
+ "model.layers.20.self_attn.q_proj",
173
+ "model.layers.16.mlp.up_proj",
174
+ "model.layers.10.self_attn.k_proj",
175
+ "model.layers.15.mlp.up_proj",
176
+ "model.layers.17.self_attn.q_proj",
177
+ "model.layers.8.mlp.down_proj",
178
+ "model.layers.3.mlp.gate_proj",
179
+ "model.layers.6.self_attn.o_proj",
180
+ "model.layers.9.mlp.up_proj",
181
+ "model.layers.17.self_attn.k_proj",
182
+ "model.layers.22.self_attn.k_proj",
183
+ "model.layers.20.mlp.down_proj",
184
+ "model.layers.10.mlp.gate_proj",
185
+ "model.layers.5.self_attn.k_proj",
186
+ "model.layers.19.self_attn.q_proj",
187
+ "model.layers.11.self_attn.o_proj",
188
+ "model.layers.20.mlp.gate_proj",
189
+ "model.layers.3.self_attn.q_proj",
190
+ "model.layers.13.self_attn.v_proj"
191
+ ],
192
+ "task_type": "CAUSAL_LM",
193
+ "use_dora": false,
194
+ "use_rslora": false
195
+ }
unimvu_uni_0.5B/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2e7a1a20f9d35a27a238b207b376a644fdea5320b1d3ea2afdb4a228068f999
3
+ size 70430368
unimvu_uni_0.5B/config.json ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "lmms-lab/llava-onevision-qwen2-0.5b-ov",
3
+ "architectures": [
4
+ "LlavaQwenForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "default_input_dim": 1024,
9
+ "eos_token_id": 151645,
10
+ "feat_combine_method": "add",
11
+ "hidden_act": "silu",
12
+ "hidden_size": 896,
13
+ "ignore_index": -100,
14
+ "image_aspect_ratio": "anyres_max_9",
15
+ "image_crop_resolution": null,
16
+ "image_grid_pinpoints": [
17
+ [
18
+ 384,
19
+ 384
20
+ ],
21
+ [
22
+ 384,
23
+ 768
24
+ ],
25
+ [
26
+ 384,
27
+ 1152
28
+ ],
29
+ [
30
+ 384,
31
+ 1536
32
+ ],
33
+ [
34
+ 384,
35
+ 1920
36
+ ],
37
+ [
38
+ 384,
39
+ 2304
40
+ ],
41
+ [
42
+ 768,
43
+ 384
44
+ ],
45
+ [
46
+ 768,
47
+ 768
48
+ ],
49
+ [
50
+ 768,
51
+ 1152
52
+ ],
53
+ [
54
+ 768,
55
+ 1536
56
+ ],
57
+ [
58
+ 768,
59
+ 1920
60
+ ],
61
+ [
62
+ 768,
63
+ 2304
64
+ ],
65
+ [
66
+ 1152,
67
+ 384
68
+ ],
69
+ [
70
+ 1152,
71
+ 768
72
+ ],
73
+ [
74
+ 1152,
75
+ 1152
76
+ ],
77
+ [
78
+ 1152,
79
+ 1536
80
+ ],
81
+ [
82
+ 1152,
83
+ 1920
84
+ ],
85
+ [
86
+ 1152,
87
+ 2304
88
+ ],
89
+ [
90
+ 1536,
91
+ 384
92
+ ],
93
+ [
94
+ 1536,
95
+ 768
96
+ ],
97
+ [
98
+ 1536,
99
+ 1152
100
+ ],
101
+ [
102
+ 1536,
103
+ 1536
104
+ ],
105
+ [
106
+ 1536,
107
+ 1920
108
+ ],
109
+ [
110
+ 1536,
111
+ 2304
112
+ ],
113
+ [
114
+ 1920,
115
+ 384
116
+ ],
117
+ [
118
+ 1920,
119
+ 768
120
+ ],
121
+ [
122
+ 1920,
123
+ 1152
124
+ ],
125
+ [
126
+ 1920,
127
+ 1536
128
+ ],
129
+ [
130
+ 1920,
131
+ 1920
132
+ ],
133
+ [
134
+ 1920,
135
+ 2304
136
+ ],
137
+ [
138
+ 2304,
139
+ 384
140
+ ],
141
+ [
142
+ 2304,
143
+ 768
144
+ ],
145
+ [
146
+ 2304,
147
+ 1152
148
+ ],
149
+ [
150
+ 2304,
151
+ 1536
152
+ ],
153
+ [
154
+ 2304,
155
+ 1920
156
+ ],
157
+ [
158
+ 2304,
159
+ 2304
160
+ ]
161
+ ],
162
+ "image_split_resolution": null,
163
+ "image_token_index": 151646,
164
+ "initializer_range": 0.02,
165
+ "input_dim": 1024,
166
+ "intermediate_size": 4864,
167
+ "max_position_embeddings": 32768,
168
+ "max_window_layers": 24,
169
+ "mm_hidden_size": 1152,
170
+ "mm_newline_position": "grid",
171
+ "mm_patch_merge_type": "spatial_unpad",
172
+ "mm_projector_lr": null,
173
+ "mm_projector_type": "mlp2x_gelu",
174
+ "mm_resampler_type": null,
175
+ "mm_spatial_pool_mode": "bilinear",
176
+ "mm_spatial_pool_out_channels": null,
177
+ "mm_spatial_pool_stride": 2,
178
+ "mm_tunable_parts": "mm_vision_tower,mm_mlp_adapter,mm_language_model",
179
+ "mm_use_im_patch_token": false,
180
+ "mm_use_im_start_end": false,
181
+ "mm_video_tower": null,
182
+ "mm_vision_select_feature": "patch",
183
+ "mm_vision_select_layer": -2,
184
+ "mm_vision_tower": "google/siglip-so400m-patch14-384",
185
+ "mm_vision_tower_lr": 2e-06,
186
+ "modality_aggregator_attention_dropout": 0.0,
187
+ "modality_aggregator_config": {
188
+ "attention_dropout": 0.0,
189
+ "hidden_size": 896,
190
+ "modality_token_num": 1,
191
+ "num_heads": 14,
192
+ "num_key_value_heads": 14,
193
+ "rope_theta": 250000
194
+ },
195
+ "modality_aggregator_hidden_size": 896,
196
+ "modality_aggregator_modality_token_num": 1,
197
+ "modality_aggregator_num_heads": 14,
198
+ "modality_aggregator_num_key_value_heads": 14,
199
+ "modality_aggregator_rope_theta": 250000,
200
+ "modality_input_dims": {
201
+ "3d_feature": 1024,
202
+ "audio": 1024,
203
+ "dense_video": 1024,
204
+ "video": 1024
205
+ },
206
+ "model_name_or_path": "lmms-lab/llava-onevision-qwen2-0.5b-ov",
207
+ "model_type": "unimvuv3_uni",
208
+ "num_attention_heads": 14,
209
+ "num_cross_modality_hidden_layers": 1,
210
+ "num_hidden_layers": 24,
211
+ "num_key_value_heads": 2,
212
+ "pos_skipping_range": 4096,
213
+ "pretrain_mm_mlp_adapter": null,
214
+ "projector_hidden_act": "gelu",
215
+ "rms_norm_eps": 1e-06,
216
+ "rope_scaling": null,
217
+ "rope_theta": 1000000.0,
218
+ "sliding_window": 32768,
219
+ "support_modalities": [
220
+ "video",
221
+ "audio",
222
+ "3d_feature",
223
+ "dense_video"
224
+ ],
225
+ "text_config": {
226
+ "_name_or_path": "",
227
+ "add_cross_attention": false,
228
+ "architectures": null,
229
+ "attention_bias": false,
230
+ "attention_dropout": 0.0,
231
+ "bad_words_ids": null,
232
+ "begin_suppress_tokens": null,
233
+ "bos_token_id": 1,
234
+ "chunk_size_feed_forward": 0,
235
+ "cross_attention_hidden_size": null,
236
+ "decoder_start_token_id": null,
237
+ "diversity_penalty": 0.0,
238
+ "do_sample": false,
239
+ "early_stopping": false,
240
+ "encoder_no_repeat_ngram_size": 0,
241
+ "eos_token_id": 2,
242
+ "exponential_decay_length_penalty": null,
243
+ "finetuning_task": null,
244
+ "forced_bos_token_id": null,
245
+ "forced_eos_token_id": null,
246
+ "hidden_act": "silu",
247
+ "hidden_size": 4096,
248
+ "id2label": {
249
+ "0": "LABEL_0",
250
+ "1": "LABEL_1"
251
+ },
252
+ "initializer_range": 0.02,
253
+ "intermediate_size": 11008,
254
+ "is_decoder": false,
255
+ "is_encoder_decoder": false,
256
+ "label2id": {
257
+ "LABEL_0": 0,
258
+ "LABEL_1": 1
259
+ },
260
+ "length_penalty": 1.0,
261
+ "max_length": 20,
262
+ "max_position_embeddings": 2048,
263
+ "min_length": 0,
264
+ "model_type": "llama",
265
+ "no_repeat_ngram_size": 0,
266
+ "num_attention_heads": 32,
267
+ "num_beam_groups": 1,
268
+ "num_beams": 1,
269
+ "num_hidden_layers": 32,
270
+ "num_key_value_heads": 32,
271
+ "num_return_sequences": 1,
272
+ "output_attentions": false,
273
+ "output_hidden_states": false,
274
+ "output_scores": false,
275
+ "pad_token_id": null,
276
+ "prefix": null,
277
+ "pretraining_tp": 1,
278
+ "problem_type": null,
279
+ "pruned_heads": {},
280
+ "remove_invalid_values": false,
281
+ "repetition_penalty": 1.0,
282
+ "return_dict": true,
283
+ "return_dict_in_generate": false,
284
+ "rms_norm_eps": 1e-06,
285
+ "rope_scaling": null,
286
+ "rope_theta": 10000.0,
287
+ "sep_token_id": null,
288
+ "suppress_tokens": null,
289
+ "task_specific_params": null,
290
+ "temperature": 1.0,
291
+ "tf_legacy_loss": false,
292
+ "tie_encoder_decoder": false,
293
+ "tie_word_embeddings": false,
294
+ "tokenizer_class": null,
295
+ "top_k": 50,
296
+ "top_p": 1.0,
297
+ "torch_dtype": null,
298
+ "torchscript": false,
299
+ "typical_p": 1.0,
300
+ "use_bfloat16": false,
301
+ "use_cache": true,
302
+ "vocab_size": 32000
303
+ },
304
+ "tie_word_embeddings": true,
305
+ "tokenizer_model_max_length": 32768,
306
+ "tokenizer_padding_side": "right",
307
+ "torch_dtype": "bfloat16",
308
+ "transformers_version": "4.37.2",
309
+ "tune_addition_token_embeddings": false,
310
+ "tune_mm_mlp_adapter": false,
311
+ "unfreeze_mm_vision_tower": false,
312
+ "use_cache": true,
313
+ "use_mm_proj": true,
314
+ "use_pos_skipping": false,
315
+ "use_sliding_window": false,
316
+ "version": "conv_llava_ov_qwen",
317
+ "video_tower": null,
318
+ "vision_config": {
319
+ "_name_or_path": "",
320
+ "add_cross_attention": false,
321
+ "architectures": null,
322
+ "attention_dropout": 0.0,
323
+ "bad_words_ids": null,
324
+ "begin_suppress_tokens": null,
325
+ "bos_token_id": null,
326
+ "chunk_size_feed_forward": 0,
327
+ "cross_attention_hidden_size": null,
328
+ "decoder_start_token_id": null,
329
+ "diversity_penalty": 0.0,
330
+ "do_sample": false,
331
+ "early_stopping": false,
332
+ "encoder_no_repeat_ngram_size": 0,
333
+ "eos_token_id": null,
334
+ "exponential_decay_length_penalty": null,
335
+ "finetuning_task": null,
336
+ "forced_bos_token_id": null,
337
+ "forced_eos_token_id": null,
338
+ "hidden_act": "quick_gelu",
339
+ "hidden_size": 1024,
340
+ "id2label": {
341
+ "0": "LABEL_0",
342
+ "1": "LABEL_1"
343
+ },
344
+ "image_size": 336,
345
+ "initializer_factor": 1.0,
346
+ "initializer_range": 0.02,
347
+ "intermediate_size": 4096,
348
+ "is_decoder": false,
349
+ "is_encoder_decoder": false,
350
+ "label2id": {
351
+ "LABEL_0": 0,
352
+ "LABEL_1": 1
353
+ },
354
+ "layer_norm_eps": 1e-05,
355
+ "length_penalty": 1.0,
356
+ "max_length": 20,
357
+ "min_length": 0,
358
+ "model_type": "clip_vision_model",
359
+ "no_repeat_ngram_size": 0,
360
+ "num_attention_heads": 16,
361
+ "num_beam_groups": 1,
362
+ "num_beams": 1,
363
+ "num_channels": 3,
364
+ "num_hidden_layers": 24,
365
+ "num_return_sequences": 1,
366
+ "output_attentions": false,
367
+ "output_hidden_states": false,
368
+ "output_scores": false,
369
+ "pad_token_id": null,
370
+ "patch_size": 14,
371
+ "prefix": null,
372
+ "problem_type": null,
373
+ "projection_dim": 768,
374
+ "pruned_heads": {},
375
+ "remove_invalid_values": false,
376
+ "repetition_penalty": 1.0,
377
+ "return_dict": true,
378
+ "return_dict_in_generate": false,
379
+ "sep_token_id": null,
380
+ "suppress_tokens": null,
381
+ "task_specific_params": null,
382
+ "temperature": 1.0,
383
+ "tf_legacy_loss": false,
384
+ "tie_encoder_decoder": false,
385
+ "tie_word_embeddings": true,
386
+ "tokenizer_class": null,
387
+ "top_k": 50,
388
+ "top_p": 1.0,
389
+ "torch_dtype": null,
390
+ "torchscript": false,
391
+ "typical_p": 1.0,
392
+ "use_bfloat16": false,
393
+ "vocab_size": 32000
394
+ },
395
+ "vision_feature_layer": -2,
396
+ "vision_feature_select_strategy": "default",
397
+ "vision_tower": "google/siglip-so400m-patch14-384",
398
+ "vision_tower_pretrained": null,
399
+ "vocab_size": 151647
400
+ }
unimvu_uni_0.5B/non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05727352f884de2300369399c06a8cebcc001e6e72f7a556dbbc61257aac262
3
+ size 33296386