thisisiron commited on
Commit
4590b7c
·
verified ·
1 Parent(s): 138bec8

Upload Ovis2ForConditionalGeneration

Browse files
Files changed (3) hide show
  1. config.json +40 -5
  2. generation_config.json +4 -1
  3. model.safetensors +3 -0
config.json CHANGED
@@ -2,21 +2,28 @@
2
  "architectures": [
3
  "Ovis2ForConditionalGeneration"
4
  ],
 
5
  "hidden_size": 1536,
6
  "image_token_id": 151665,
7
  "initializer_range": 0.02,
8
  "model_type": "ovis2",
9
  "text_config": {
 
10
  "architectures": [
11
  "Qwen2ForCausalLM"
12
  ],
13
  "attention_dropout": 0.0,
14
  "bos_token_id": 151643,
 
 
 
15
  "eos_token_id": 151645,
 
16
  "hidden_act": "silu",
17
  "hidden_size": 1536,
18
  "initializer_range": 0.02,
19
  "intermediate_size": 8960,
 
20
  "layer_types": [
21
  "full_attention",
22
  "full_attention",
@@ -53,19 +60,30 @@
53
  "num_attention_heads": 12,
54
  "num_hidden_layers": 28,
55
  "num_key_value_heads": 2,
 
 
 
56
  "rms_norm_eps": 1e-06,
57
- "rope_scaling": null,
58
- "rope_theta": 1000000.0,
 
 
 
59
  "sliding_window": null,
 
 
 
60
  "tie_word_embeddings": true,
61
- "torch_dtype": "bfloat16",
 
62
  "use_cache": true,
63
  "use_sliding_window": false,
64
  "vocab_size": 151936
65
  },
66
- "torch_dtype": "float32",
67
- "transformers_version": "4.56.0.dev0",
68
  "vision_config": {
 
69
  "attention_dropout": 0.0,
70
  "backbone_config": {
71
  "_attn_implementation_autoset": true,
@@ -151,25 +169,42 @@
151
  "use_bias": false
152
  },
153
  "backbone_kwargs": {},
 
 
 
154
  "depths": null,
155
  "drop_cls_token": false,
 
 
 
156
  "hidden_act": "silu",
157
  "hidden_size": 1024,
158
  "hidden_stride": 2,
159
  "image_size": 448,
160
  "initializer_range": 0.02,
161
  "intermediate_size": 2816,
 
162
  "mlp_bias": false,
163
  "model_type": "",
164
  "num_attention_heads": 8,
165
  "num_channels": 3,
166
  "num_hidden_layers": 24,
167
  "num_visual_indicator_tokens": 5,
 
168
  "patch_size": 14,
 
 
169
  "qkv_bias": false,
170
  "rms_norm_eps": 1e-05,
 
 
171
  "tau": 1.0,
 
 
 
172
  "tokenize_function": "softmax",
 
 
173
  "use_indicators": false,
174
  "vocab_size": 65536
175
  },
 
2
  "architectures": [
3
  "Ovis2ForConditionalGeneration"
4
  ],
5
+ "dtype": "float32",
6
  "hidden_size": 1536,
7
  "image_token_id": 151665,
8
  "initializer_range": 0.02,
9
  "model_type": "ovis2",
10
  "text_config": {
11
+ "add_cross_attention": false,
12
  "architectures": [
13
  "Qwen2ForCausalLM"
14
  ],
15
  "attention_dropout": 0.0,
16
  "bos_token_id": 151643,
17
+ "cross_attention_hidden_size": null,
18
+ "decoder_start_token_id": null,
19
+ "dtype": "float32",
20
  "eos_token_id": 151645,
21
+ "finetuning_task": null,
22
  "hidden_act": "silu",
23
  "hidden_size": 1536,
24
  "initializer_range": 0.02,
25
  "intermediate_size": 8960,
26
+ "is_decoder": false,
27
  "layer_types": [
28
  "full_attention",
29
  "full_attention",
 
60
  "num_attention_heads": 12,
61
  "num_hidden_layers": 28,
62
  "num_key_value_heads": 2,
63
+ "pad_token_id": null,
64
+ "prefix": null,
65
+ "pruned_heads": {},
66
  "rms_norm_eps": 1e-06,
67
+ "rope_parameters": {
68
+ "rope_theta": 1000000.0,
69
+ "rope_type": "default"
70
+ },
71
+ "sep_token_id": null,
72
  "sliding_window": null,
73
+ "task_specific_params": null,
74
+ "tf_legacy_loss": false,
75
+ "tie_encoder_decoder": false,
76
  "tie_word_embeddings": true,
77
+ "tokenizer_class": null,
78
+ "torchscript": false,
79
  "use_cache": true,
80
  "use_sliding_window": false,
81
  "vocab_size": 151936
82
  },
83
+ "tie_word_embeddings": true,
84
+ "transformers_version": "5.5.0.dev0",
85
  "vision_config": {
86
+ "add_cross_attention": false,
87
  "attention_dropout": 0.0,
88
  "backbone_config": {
89
  "_attn_implementation_autoset": true,
 
169
  "use_bias": false
170
  },
171
  "backbone_kwargs": {},
172
+ "bos_token_id": null,
173
+ "cross_attention_hidden_size": null,
174
+ "decoder_start_token_id": null,
175
  "depths": null,
176
  "drop_cls_token": false,
177
+ "dtype": "float32",
178
+ "eos_token_id": null,
179
+ "finetuning_task": null,
180
  "hidden_act": "silu",
181
  "hidden_size": 1024,
182
  "hidden_stride": 2,
183
  "image_size": 448,
184
  "initializer_range": 0.02,
185
  "intermediate_size": 2816,
186
+ "is_decoder": false,
187
  "mlp_bias": false,
188
  "model_type": "",
189
  "num_attention_heads": 8,
190
  "num_channels": 3,
191
  "num_hidden_layers": 24,
192
  "num_visual_indicator_tokens": 5,
193
+ "pad_token_id": null,
194
  "patch_size": 14,
195
+ "prefix": null,
196
+ "pruned_heads": {},
197
  "qkv_bias": false,
198
  "rms_norm_eps": 1e-05,
199
+ "sep_token_id": null,
200
+ "task_specific_params": null,
201
  "tau": 1.0,
202
+ "tf_legacy_loss": false,
203
+ "tie_encoder_decoder": false,
204
+ "tie_word_embeddings": true,
205
  "tokenize_function": "softmax",
206
+ "tokenizer_class": null,
207
+ "torchscript": false,
208
  "use_indicators": false,
209
  "vocab_size": 65536
210
  },
generation_config.json CHANGED
@@ -2,5 +2,8 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 151643,
4
  "eos_token_id": 151645,
5
- "transformers_version": "4.56.0.dev0"
 
 
 
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 151643,
4
  "eos_token_id": 151645,
5
+ "output_attentions": false,
6
+ "output_hidden_states": false,
7
+ "transformers_version": "5.5.0.dev0",
8
+ "use_cache": true
9
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc5e9c6092d80fbe27004ba0540cbf484f091fc1853bbef7b05b6914eea8f1cb
3
+ size 8891707608