rkazants commited on
Commit
17d19da
·
verified ·
1 Parent(s): 5fab6b4

Upload 10 files

Browse files
config.json CHANGED
@@ -1,55 +1,173 @@
1
  {
2
- "_name_or_path": "rkazants/tiny-random-MiniCPM-o-2_6",
3
  "architectures": [
4
  "MiniCPMO"
5
  ],
6
  "attention_dropout": 0.0,
7
  "audio_chunk_length": 1.0,
8
  "audio_config": {
9
- "classifier_proj_size": 64,
10
- "d_model": 4,
11
- "decoder_attention_heads": 2,
12
- "decoder_ffn_dim": 32,
13
- "decoder_layers": 2,
14
- "encoder_attention_heads": 2,
15
- "encoder_ffn_dim": 32,
 
 
 
 
 
 
 
 
 
16
  "encoder_layers": 2,
17
- "intermediate_size": 64,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "model_type": "whisper",
19
- "num_hidden_layers": 2,
20
- "num_mel_bins": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  "audio_pool_step": 2,
23
  "auto_map": {
24
- "AutoConfig": "configuration_minicpm.MiniCPMOConfig",
25
- "AutoModel": "modeling_minicpmo.MiniCPMO",
26
- "AutoModelForCausalLM": "modeling_minicpmo.MiniCPMO"
27
  },
28
  "batch_vision_input": true,
29
- "drop_vision_last_layer": true,
 
 
 
30
  "hidden_act": "silu",
31
- "hidden_size": 128,
32
- "image_size": 28,
33
  "init_audio": true,
34
  "init_tts": true,
35
  "init_vision": true,
36
  "initializer_range": 0.02,
37
- "intermediate_size": 128,
 
38
  "max_position_embeddings": 32768,
39
  "max_window_layers": 28,
40
  "model_type": "minicpmo",
41
- "num_attention_heads": 2,
 
42
  "num_hidden_layers": 2,
43
- "num_key_value_heads": 32,
44
- "patch_size": 2,
45
- "query_num": 2,
46
  "rms_norm_eps": 1e-06,
47
- "rope_theta": 10000.0,
48
  "slice_config": {
49
- "max_slice_nums": 3,
50
- "model_type": "minicpmv",
51
- "patch_size": 2,
52
- "scale_resolution": 256
53
  },
54
  "slice_mode": true,
55
  "sliding_window": null,
@@ -58,26 +176,23 @@
58
  "torch_dtype": "float32",
59
  "transformers_version": "4.44.2",
60
  "tts_config": {
61
- "hidden_size": 64,
62
- "intermediate_size": 64,
63
- "llm_dim": 64,
64
- "model_type": "conditional_chattts",
65
- "num_attention_heads": 2,
66
- "num_hidden_layers": 2,
67
- "num_mel_bins": 10
68
  },
69
  "use_cache": true,
70
  "use_image_id": true,
71
  "use_sliding_window": false,
72
- "vision_batch_size": 2,
 
73
  "vision_config": {
74
- "hidden_size": 64,
75
- "image_size": 28,
76
- "intermediate_size": 64,
77
  "model_type": "siglip_vision_model",
78
  "num_attention_heads": 2,
79
- "num_hidden_layers": 2,
80
- "patch_size": 2
81
  },
82
- "vocab_size": 151936
83
  }
 
1
  {
2
+ "_name_or_path": "openbmb/MiniCPM-o-2_6",
3
  "architectures": [
4
  "MiniCPMO"
5
  ],
6
  "attention_dropout": 0.0,
7
  "audio_chunk_length": 1.0,
8
  "audio_config": {
9
+ "_name_or_path": "openai/whisper-medium",
10
+ "architectures": [
11
+ "MiniCPMWhisperEncoder"
12
+ ],
13
+ "begin_suppress_tokens": [
14
+ 220,
15
+ 50257
16
+ ],
17
+ "bos_token_id": 50257,
18
+ "d_model": 1024,
19
+ "decoder_attention_heads": 16,
20
+ "decoder_ffn_dim": 4096,
21
+ "decoder_layers": 24,
22
+ "decoder_start_token_id": 50258,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
  "encoder_layers": 2,
26
+ "eos_token_id": 50257,
27
+ "forced_decoder_ids": [
28
+ [
29
+ 1,
30
+ 50259
31
+ ],
32
+ [
33
+ 2,
34
+ 50359
35
+ ],
36
+ [
37
+ 3,
38
+ 50363
39
+ ]
40
+ ],
41
+ "max_length": 448,
42
  "model_type": "whisper",
43
+ "num_hidden_layers": 24,
44
+ "pad_token_id": 50257,
45
+ "suppress_tokens": [
46
+ 1,
47
+ 2,
48
+ 7,
49
+ 8,
50
+ 9,
51
+ 10,
52
+ 14,
53
+ 25,
54
+ 26,
55
+ 27,
56
+ 28,
57
+ 29,
58
+ 31,
59
+ 58,
60
+ 59,
61
+ 60,
62
+ 61,
63
+ 62,
64
+ 63,
65
+ 90,
66
+ 91,
67
+ 92,
68
+ 93,
69
+ 359,
70
+ 503,
71
+ 522,
72
+ 542,
73
+ 873,
74
+ 893,
75
+ 902,
76
+ 918,
77
+ 922,
78
+ 931,
79
+ 1350,
80
+ 1853,
81
+ 1982,
82
+ 2460,
83
+ 2627,
84
+ 3246,
85
+ 3253,
86
+ 3268,
87
+ 3536,
88
+ 3846,
89
+ 3961,
90
+ 4183,
91
+ 4667,
92
+ 6585,
93
+ 6647,
94
+ 7273,
95
+ 9061,
96
+ 9383,
97
+ 10428,
98
+ 10929,
99
+ 11938,
100
+ 12033,
101
+ 12331,
102
+ 12562,
103
+ 13793,
104
+ 14157,
105
+ 14635,
106
+ 15265,
107
+ 15618,
108
+ 16553,
109
+ 16604,
110
+ 18362,
111
+ 18956,
112
+ 20075,
113
+ 21675,
114
+ 22520,
115
+ 26130,
116
+ 26161,
117
+ 26435,
118
+ 28279,
119
+ 29464,
120
+ 31650,
121
+ 32302,
122
+ 32470,
123
+ 36865,
124
+ 42863,
125
+ 47425,
126
+ 49870,
127
+ 50254,
128
+ 50258,
129
+ 50358,
130
+ 50359,
131
+ 50360,
132
+ 50361,
133
+ 50362
134
+ ],
135
+ "torch_dtype": "float32"
136
  },
137
  "audio_pool_step": 2,
138
  "auto_map": {
139
+ "AutoConfig": "openbmb/MiniCPM-o-2_6--configuration_minicpm.MiniCPMOConfig",
140
+ "AutoModel": "openbmb/MiniCPM-o-2_6--modeling_minicpmo.MiniCPMO",
141
+ "AutoModelForCausalLM": "openbmb/MiniCPM-o-2_6--modeling_minicpmo.MiniCPMO"
142
  },
143
  "batch_vision_input": true,
144
+ "bos_token_id": 151643,
145
+ "chunk_input": true,
146
+ "drop_vision_last_layer": false,
147
+ "eos_token_id": 151645,
148
  "hidden_act": "silu",
149
+ "hidden_size": 168,
150
+ "image_size": 448,
151
  "init_audio": true,
152
  "init_tts": true,
153
  "init_vision": true,
154
  "initializer_range": 0.02,
155
+ "intermediate_size": 32,
156
+ "listen_speak_type": "asr",
157
  "max_position_embeddings": 32768,
158
  "max_window_layers": 28,
159
  "model_type": "minicpmo",
160
+ "num_attention_heads": 28,
161
+ "num_heads": 2,
162
  "num_hidden_layers": 2,
163
+ "num_key_value_heads": 4,
164
+ "patch_size": 14,
165
+ "query_num": 64,
166
  "rms_norm_eps": 1e-06,
167
+ "rope_theta": 1000000.0,
168
  "slice_config": {
169
+ "max_slice_nums": 9,
170
+ "model_type": "minicpmv"
 
 
171
  },
172
  "slice_mode": true,
173
  "sliding_window": null,
 
176
  "torch_dtype": "float32",
177
  "transformers_version": "4.44.2",
178
  "tts_config": {
179
+ "hidden_size": 24,
180
+ "llm_dim": 32,
181
+ "model_type": "conditional_chattts"
 
 
 
 
182
  },
183
  "use_cache": true,
184
  "use_image_id": true,
185
  "use_sliding_window": false,
186
+ "version": 2.6,
187
+ "vision_batch_size": 16,
188
  "vision_config": {
189
+ "hidden_size": 32,
190
+ "image_size": 224,
191
+ "intermediate_size": 32,
192
  "model_type": "siglip_vision_model",
193
  "num_attention_heads": 2,
194
+ "num_hidden_layers": 1,
195
+ "patch_size": 14
196
  },
197
+ "vocab_size": 151700
198
  }
generation_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
2
  "_from_model_config": true,
 
 
3
  "transformers_version": "4.44.2"
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
  "transformers_version": "4.44.2"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a72d8aaf4796fa246d22d26940d4601214eb76232423abb960dea88b8fbc607
3
- size 236425096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e78bc28359ce3fdf08cfc2dc701d4903c34ebb5943e12decb3d5ce6ad9901f80
3
+ size 410265248
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -506,8 +506,8 @@
506
  ],
507
  "auto_map": {
508
  "AutoTokenizer": [
509
- "tokenization_qwen2.Qwen2Tokenizer",
510
- "tokenization_minicpmo_fast.MiniCPMOTokenizerFast"
511
  ]
512
  },
513
  "bos_token": "<|im_start|>",
 
506
  ],
507
  "auto_map": {
508
  "AutoTokenizer": [
509
+ "openbmb/MiniCPM-o-2_6--tokenization_minicpmo_fast.MiniCPMOTokenizerFast",
510
+ null
511
  ]
512
  },
513
  "bos_token": "<|im_start|>",