Upload VibeVoiceForConditionalGeneration
Browse files- config.json +5 -5
- generation_config.json +2 -7
- model-00001-of-00010.safetensors +3 -0
- model-00002-of-00010.safetensors +3 -0
- model-00003-of-00010.safetensors +3 -0
- model-00004-of-00010.safetensors +3 -0
- model-00005-of-00010.safetensors +3 -0
- model-00006-of-00010.safetensors +3 -0
- model-00007-of-00010.safetensors +3 -0
- model-00008-of-00010.safetensors +3 -0
- model-00009-of-00010.safetensors +3 -0
- model-00010-of-00010.safetensors +3 -0
- model.safetensors.index.json +0 -0
config.json
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"acostic_vae_dim": 64,
|
| 3 |
"acoustic_tokenizer_config": {
|
| 4 |
-
"bias": true,
|
| 5 |
"channels": 1,
|
| 6 |
"depths": [
|
| 7 |
3,
|
|
@@ -27,7 +26,7 @@
|
|
| 27 |
"kernel_size": 7,
|
| 28 |
"layer_scale_init_value": 1e-06,
|
| 29 |
"model_type": "vibevoice_acoustic_tokenizer",
|
| 30 |
-
"
|
| 31 |
"rms_norm_eps": 1e-05,
|
| 32 |
"vae_std": 0.625,
|
| 33 |
"weight_init_value": 0.01
|
|
@@ -48,12 +47,12 @@
|
|
| 48 |
"intermediate_size": 10752,
|
| 49 |
"mlp_bias": false,
|
| 50 |
"model_type": "vibevoice",
|
|
|
|
| 51 |
"num_head_layers": 4,
|
| 52 |
"pad_token_id": 151643,
|
| 53 |
"prediction_type": "v_prediction",
|
| 54 |
"rms_norm_eps": 1e-05,
|
| 55 |
"semantic_tokenizer_config": {
|
| 56 |
-
"bias": true,
|
| 57 |
"channels": 1,
|
| 58 |
"depths": [
|
| 59 |
3,
|
|
@@ -75,10 +74,11 @@
|
|
| 75 |
"ffn_expansion": 4,
|
| 76 |
"hidden_act": "gelu",
|
| 77 |
"hidden_size": 128,
|
|
|
|
| 78 |
"kernel_size": 7,
|
| 79 |
"layer_scale_init_value": 1e-06,
|
| 80 |
"model_type": "vibevoice_semantic_tokenizer",
|
| 81 |
-
"
|
| 82 |
"rms_norm_eps": 1e-05,
|
| 83 |
"weight_init_value": 0.01
|
| 84 |
},
|
|
@@ -141,6 +141,6 @@
|
|
| 141 |
"vocab_size": 152064
|
| 142 |
},
|
| 143 |
"tie_word_embeddings": false,
|
| 144 |
-
"transformers_version": "5.
|
| 145 |
"vocab_size": 152064
|
| 146 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"acostic_vae_dim": 64,
|
| 3 |
"acoustic_tokenizer_config": {
|
|
|
|
| 4 |
"channels": 1,
|
| 5 |
"depths": [
|
| 6 |
3,
|
|
|
|
| 26 |
"kernel_size": 7,
|
| 27 |
"layer_scale_init_value": 1e-06,
|
| 28 |
"model_type": "vibevoice_acoustic_tokenizer",
|
| 29 |
+
"num_filters": 32,
|
| 30 |
"rms_norm_eps": 1e-05,
|
| 31 |
"vae_std": 0.625,
|
| 32 |
"weight_init_value": 0.01
|
|
|
|
| 47 |
"intermediate_size": 10752,
|
| 48 |
"mlp_bias": false,
|
| 49 |
"model_type": "vibevoice",
|
| 50 |
+
"num_diffusion_steps": 10,
|
| 51 |
"num_head_layers": 4,
|
| 52 |
"pad_token_id": 151643,
|
| 53 |
"prediction_type": "v_prediction",
|
| 54 |
"rms_norm_eps": 1e-05,
|
| 55 |
"semantic_tokenizer_config": {
|
|
|
|
| 56 |
"channels": 1,
|
| 57 |
"depths": [
|
| 58 |
3,
|
|
|
|
| 74 |
"ffn_expansion": 4,
|
| 75 |
"hidden_act": "gelu",
|
| 76 |
"hidden_size": 128,
|
| 77 |
+
"initializer_range": 0.01,
|
| 78 |
"kernel_size": 7,
|
| 79 |
"layer_scale_init_value": 1e-06,
|
| 80 |
"model_type": "vibevoice_semantic_tokenizer",
|
| 81 |
+
"num_filters": 32,
|
| 82 |
"rms_norm_eps": 1e-05,
|
| 83 |
"weight_init_value": 0.01
|
| 84 |
},
|
|
|
|
| 141 |
"vocab_size": 152064
|
| 142 |
},
|
| 143 |
"tie_word_embeddings": false,
|
| 144 |
+
"transformers_version": "5.2.0.dev0",
|
| 145 |
"vocab_size": 152064
|
| 146 |
}
|
generation_config.json
CHANGED
|
@@ -1,23 +1,18 @@
|
|
| 1 |
{
|
| 2 |
"_from_model_config": false,
|
| 3 |
-
"audio_bos_token_id": 151652,
|
| 4 |
-
"audio_diffusion_id": 151654,
|
| 5 |
-
"audio_eos_token_id": 151653,
|
| 6 |
-
"cfg_scale": 1.3,
|
| 7 |
"do_sample": false,
|
| 8 |
"eos_token_id": 151643,
|
|
|
|
| 9 |
"max_length": 20250,
|
| 10 |
"max_new_tokens": 20250,
|
| 11 |
-
"n_diffusion_steps": 10,
|
| 12 |
"noise_scheduler_class": "DPMSolverMultistepScheduler",
|
| 13 |
"noise_scheduler_config": {
|
| 14 |
"beta_schedule": "squaredcos_cap_v2",
|
| 15 |
-
"num_train_timesteps": 1000,
|
| 16 |
"prediction_type": "v_prediction"
|
| 17 |
},
|
| 18 |
"output_attentions": false,
|
| 19 |
"output_hidden_states": false,
|
| 20 |
"pad_token_id": 151643,
|
| 21 |
-
"transformers_version": "5.
|
| 22 |
"use_cache": true
|
| 23 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"_from_model_config": false,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"do_sample": false,
|
| 4 |
"eos_token_id": 151643,
|
| 5 |
+
"guidance_scale": 1.3,
|
| 6 |
"max_length": 20250,
|
| 7 |
"max_new_tokens": 20250,
|
|
|
|
| 8 |
"noise_scheduler_class": "DPMSolverMultistepScheduler",
|
| 9 |
"noise_scheduler_config": {
|
| 10 |
"beta_schedule": "squaredcos_cap_v2",
|
|
|
|
| 11 |
"prediction_type": "v_prediction"
|
| 12 |
},
|
| 13 |
"output_attentions": false,
|
| 14 |
"output_hidden_states": false,
|
| 15 |
"pad_token_id": 151643,
|
| 16 |
+
"transformers_version": "5.2.0.dev0",
|
| 17 |
"use_cache": true
|
| 18 |
}
|
model-00001-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:666a9b830bda088607298be3307f18abc09a3812bd0f3da09e9aba841d481420
|
| 3 |
+
size 1886423996
|
model-00002-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c56b1ca707e31e435ded8b03baa4938d88275bf0ba7033935a16d8173a99ff85
|
| 3 |
+
size 1864468520
|
model-00003-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48bfb4af453d45e488050e90d3f39da0189f1c10a77d75223c2c2ced8b035baa
|
| 3 |
+
size 1864468520
|
model-00004-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4893be477be68e53b8a9616422b99065f3d1431cce9efe0a1653495e9cf4df6
|
| 3 |
+
size 1864468544
|
model-00005-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:471690e9846e791def400fefa3d2103c9839dc8a3e987b175f6539c7412422d6
|
| 3 |
+
size 1864468568
|
model-00006-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7918d400ba895b15a1126fde242028e5d05b37bab0c0427944de81df80f901f
|
| 3 |
+
size 1864468568
|
model-00007-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4f00ebea5a9f76eea891b3457621955433149ae603d921afa1498e46683ba37
|
| 3 |
+
size 1864468568
|
model-00008-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e1f62c077b250be14d45a376911b4c11a56a898088fc2ef3797fd18a3d14c29
|
| 3 |
+
size 1992444448
|
model-00009-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dd198a4c6fd9726844ae116fbb634d7e3a063e2ca9aa552a8257ef8076fb617
|
| 3 |
+
size 1939853578
|
model-00010-of-00010.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fad6ec53c767d8a703965d7a54e31ba765263995a62437b74a90a99209f4f348
|
| 3 |
+
size 1681341920
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|