arashkermani commited on
Commit
1967fb5
·
verified ·
1 Parent(s): 8988ef6

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.json +12 -19
  2. pytorch_model.bin +2 -2
config.json CHANGED
@@ -15,14 +15,14 @@
15
  50257
16
  ],
17
  "bos_token_id": 50257,
18
- "d_model": 64,
19
- "decoder_attention_heads": 2,
20
- "decoder_ffn_dim": 128,
21
- "decoder_layers": 1,
22
  "decoder_start_token_id": 50258,
23
- "encoder_attention_heads": 2,
24
- "encoder_ffn_dim": 128,
25
- "encoder_layers": 1,
26
  "eos_token_id": 50257,
27
  "forced_decoder_ids": [
28
  [
@@ -40,7 +40,7 @@
40
  ],
41
  "max_length": 448,
42
  "model_type": "whisper",
43
- "num_hidden_layers": 1,
44
  "pad_token_id": 50257,
45
  "suppress_tokens": [
46
  1,
@@ -148,8 +148,8 @@
148
  "hidden_act": "silu",
149
  "hidden_size": 256,
150
  "image_size": 448,
151
- "init_audio": true,
152
- "init_tts": true,
153
  "init_vision": true,
154
  "initializer_range": 0.02,
155
  "intermediate_size": 512,
@@ -176,15 +176,8 @@
176
  "torch_dtype": "bfloat16",
177
  "transformers_version": "4.46.2",
178
  "tts_config": {
179
- "hidden_size": 8,
180
- "intermediate_size": 4,
181
- "llm_dim": 4,
182
- "model_type": "conditional_chattts",
183
- "num_attention_heads": 1,
184
- "num_audio_tokens": 10,
185
- "num_hidden_layers": 1,
186
- "num_mel_bins": 10,
187
- "num_text_tokens": 20
188
  },
189
  "use_cache": true,
190
  "use_image_id": true,
 
15
  50257
16
  ],
17
  "bos_token_id": 50257,
18
+ "d_model": 1024,
19
+ "decoder_attention_heads": 16,
20
+ "decoder_ffn_dim": 4096,
21
+ "decoder_layers": 24,
22
  "decoder_start_token_id": 50258,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
+ "encoder_layers": 24,
26
  "eos_token_id": 50257,
27
  "forced_decoder_ids": [
28
  [
 
40
  ],
41
  "max_length": 448,
42
  "model_type": "whisper",
43
+ "num_hidden_layers": 24,
44
  "pad_token_id": 50257,
45
  "suppress_tokens": [
46
  1,
 
148
  "hidden_act": "silu",
149
  "hidden_size": 256,
150
  "image_size": 448,
151
+ "init_audio": false,
152
+ "init_tts": false,
153
  "init_vision": true,
154
  "initializer_range": 0.02,
155
  "intermediate_size": 512,
 
176
  "torch_dtype": "bfloat16",
177
  "transformers_version": "4.46.2",
178
  "tts_config": {
179
+ "llm_dim": 3584,
180
+ "model_type": "conditional_chattts"
 
 
 
 
 
 
 
181
  },
182
  "use_cache": true,
183
  "use_image_id": true,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7955f2e2d47a7db417469976a67281e11a004cc9fc86b21de5ae626a1f2d11a8
3
- size 34895535
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91ebd65c3f39ed36a031a9eb5722a54facafece3b60bcc554b6e02d624cadc4
3
+ size 3760638