aoiandroid commited on
Commit
6bb898a
·
verified ·
1 Parent(s): 15c1cc1

Upload vibevoice_pipeline_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. vibevoice_pipeline_config.json +52 -0
vibevoice_pipeline_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "VibeVoice-1.5B-CoreML",
3
+ "version": "1.0.0",
4
+ "description": "VibeVoice TTS model converted to CoreML format",
5
+ "components": {
6
+ "acoustic_encoder": {
7
+ "path": "vibevoice_acoustic_encoder.mlpackage",
8
+ "available": true
9
+ },
10
+ "semantic_encoder": {
11
+ "path": "vibevoice_semantic_encoder.mlpackage",
12
+ "available": true
13
+ },
14
+ "acoustic_connector": {
15
+ "path": "vibevoice_acoustic_connector.mlpackage",
16
+ "available": true
17
+ },
18
+ "semantic_connector": {
19
+ "path": "vibevoice_semantic_connector.mlpackage",
20
+ "available": true
21
+ },
22
+ "diffusion_head": {
23
+ "path": "vibevoice_diffusion_head.mlpackage",
24
+ "available": true
25
+ }
26
+ },
27
+ "inference": {
28
+ "audio": {
29
+ "sample_rate": 24000,
30
+ "channels": 1,
31
+ "downsample_factor": 3200,
32
+ "frame_rate_hz": 7.5,
33
+ "fixed_encoder_samples": 24000,
34
+ "fixed_encoder_note": "Acoustic and Semantic encoders accept only 24000 samples (1 sec at 24kHz); trim or pad input."
35
+ },
36
+ "diffusion": {
37
+ "num_steps": 20,
38
+ "beta_schedule": "cosine",
39
+ "prediction_type": "v_prediction",
40
+ "guidance_scale": 1.0
41
+ },
42
+ "generation": {
43
+ "max_audio_length_seconds": 60,
44
+ "max_seq_length": 4096
45
+ }
46
+ },
47
+ "platform": {
48
+ "minimum_ios_version": "17.0",
49
+ "minimum_macos_version": "14.0",
50
+ "compute_units": "ALL"
51
+ }
52
+ }