bezzam HF Staff commited on
Commit
05ba0b6
·
verified ·
1 Parent(s): 3bf2d34

Upload VibeVoiceForConditionalGeneration

Browse files
config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "acostic_vae_dim": 64,
3
  "acoustic_tokenizer_config": {
4
- "bias": true,
5
  "channels": 1,
6
  "depths": [
7
  3,
@@ -27,7 +26,7 @@
27
  "kernel_size": 7,
28
  "layer_scale_init_value": 1e-06,
29
  "model_type": "vibevoice_acoustic_tokenizer",
30
- "n_filters": 32,
31
  "rms_norm_eps": 1e-05,
32
  "vae_std": 0.625,
33
  "weight_init_value": 0.01
@@ -48,12 +47,12 @@
48
  "intermediate_size": 10752,
49
  "mlp_bias": false,
50
  "model_type": "vibevoice",
 
51
  "num_head_layers": 4,
52
  "pad_token_id": 151643,
53
  "prediction_type": "v_prediction",
54
  "rms_norm_eps": 1e-05,
55
  "semantic_tokenizer_config": {
56
- "bias": true,
57
  "channels": 1,
58
  "depths": [
59
  3,
@@ -75,10 +74,11 @@
75
  "ffn_expansion": 4,
76
  "hidden_act": "gelu",
77
  "hidden_size": 128,
 
78
  "kernel_size": 7,
79
  "layer_scale_init_value": 1e-06,
80
  "model_type": "vibevoice_semantic_tokenizer",
81
- "n_filters": 32,
82
  "rms_norm_eps": 1e-05,
83
  "weight_init_value": 0.01
84
  },
@@ -141,6 +141,6 @@
141
  "vocab_size": 152064
142
  },
143
  "tie_word_embeddings": false,
144
- "transformers_version": "5.0.0.dev0",
145
  "vocab_size": 152064
146
  }
 
1
  {
2
  "acostic_vae_dim": 64,
3
  "acoustic_tokenizer_config": {
 
4
  "channels": 1,
5
  "depths": [
6
  3,
 
26
  "kernel_size": 7,
27
  "layer_scale_init_value": 1e-06,
28
  "model_type": "vibevoice_acoustic_tokenizer",
29
+ "num_filters": 32,
30
  "rms_norm_eps": 1e-05,
31
  "vae_std": 0.625,
32
  "weight_init_value": 0.01
 
47
  "intermediate_size": 10752,
48
  "mlp_bias": false,
49
  "model_type": "vibevoice",
50
+ "num_diffusion_steps": 10,
51
  "num_head_layers": 4,
52
  "pad_token_id": 151643,
53
  "prediction_type": "v_prediction",
54
  "rms_norm_eps": 1e-05,
55
  "semantic_tokenizer_config": {
 
56
  "channels": 1,
57
  "depths": [
58
  3,
 
74
  "ffn_expansion": 4,
75
  "hidden_act": "gelu",
76
  "hidden_size": 128,
77
+ "initializer_range": 0.01,
78
  "kernel_size": 7,
79
  "layer_scale_init_value": 1e-06,
80
  "model_type": "vibevoice_semantic_tokenizer",
81
+ "num_filters": 32,
82
  "rms_norm_eps": 1e-05,
83
  "weight_init_value": 0.01
84
  },
 
141
  "vocab_size": 152064
142
  },
143
  "tie_word_embeddings": false,
144
+ "transformers_version": "5.2.0.dev0",
145
  "vocab_size": 152064
146
  }
generation_config.json CHANGED
@@ -1,23 +1,18 @@
1
  {
2
  "_from_model_config": false,
3
- "audio_bos_token_id": 151652,
4
- "audio_diffusion_id": 151654,
5
- "audio_eos_token_id": 151653,
6
- "cfg_scale": 1.3,
7
  "do_sample": false,
8
  "eos_token_id": 151643,
 
9
  "max_length": 20250,
10
  "max_new_tokens": 20250,
11
- "n_diffusion_steps": 10,
12
  "noise_scheduler_class": "DPMSolverMultistepScheduler",
13
  "noise_scheduler_config": {
14
  "beta_schedule": "squaredcos_cap_v2",
15
- "num_train_timesteps": 1000,
16
  "prediction_type": "v_prediction"
17
  },
18
  "output_attentions": false,
19
  "output_hidden_states": false,
20
  "pad_token_id": 151643,
21
- "transformers_version": "5.0.0.dev0",
22
  "use_cache": true
23
  }
 
1
  {
2
  "_from_model_config": false,
 
 
 
 
3
  "do_sample": false,
4
  "eos_token_id": 151643,
5
+ "guidance_scale": 1.3,
6
  "max_length": 20250,
7
  "max_new_tokens": 20250,
 
8
  "noise_scheduler_class": "DPMSolverMultistepScheduler",
9
  "noise_scheduler_config": {
10
  "beta_schedule": "squaredcos_cap_v2",
 
11
  "prediction_type": "v_prediction"
12
  },
13
  "output_attentions": false,
14
  "output_hidden_states": false,
15
  "pad_token_id": 151643,
16
+ "transformers_version": "5.2.0.dev0",
17
  "use_cache": true
18
  }
model-00001-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666a9b830bda088607298be3307f18abc09a3812bd0f3da09e9aba841d481420
3
+ size 1886423996
model-00002-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c56b1ca707e31e435ded8b03baa4938d88275bf0ba7033935a16d8173a99ff85
3
+ size 1864468520
model-00003-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48bfb4af453d45e488050e90d3f39da0189f1c10a77d75223c2c2ced8b035baa
3
+ size 1864468520
model-00004-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4893be477be68e53b8a9616422b99065f3d1431cce9efe0a1653495e9cf4df6
3
+ size 1864468544
model-00005-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:471690e9846e791def400fefa3d2103c9839dc8a3e987b175f6539c7412422d6
3
+ size 1864468568
model-00006-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7918d400ba895b15a1126fde242028e5d05b37bab0c0427944de81df80f901f
3
+ size 1864468568
model-00007-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f00ebea5a9f76eea891b3457621955433149ae603d921afa1498e46683ba37
3
+ size 1864468568
model-00008-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e1f62c077b250be14d45a376911b4c11a56a898088fc2ef3797fd18a3d14c29
3
+ size 1992444448
model-00009-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd198a4c6fd9726844ae116fbb634d7e3a063e2ca9aa552a8257ef8076fb617
3
+ size 1939853578
model-00010-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad6ec53c767d8a703965d7a54e31ba765263995a62437b74a90a99209f4f348
3
+ size 1681341920
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff