| { | |
| "model_type": "vui", | |
| "library_name": "vui", | |
| "pipeline_tag": "text-to-speech", | |
| "license": "mit", | |
| "language": ["en"], | |
| "architectures": ["VuiForConditionalGeneration"], | |
| "model_files": { | |
| "base": "vui-100m-base.pt", | |
| "abraham": "vui-abraham-100m.pt", | |
| "cohost": "vui-cohost-100m.pt", | |
| "cohost_alt": "ckpts-vui-cohost-100m.pt", | |
| "tokenizer": "fluac-22hz-22khz.pt" | |
| }, | |
| "model_variants": { | |
| "vui-100m-base": { | |
| "description": "Base checkpoint trained on 40k hours of audio conversations", | |
| "file": "vui-100m-base.pt", | |
| "size_mb": 198 | |
| }, | |
| "vui-abraham-100m": { | |
| "description": "Single speaker model with context awareness", | |
| "file": "vui-abraham-100m.pt", | |
| "size_mb": 198 | |
| }, | |
| "vui-cohost-100m": { | |
| "description": "Two speakers that can interact with each other", | |
| "file": "vui-cohost-100m.pt", | |
| "size_mb": 198 | |
| } | |
| }, | |
| "tokenizer_config": { | |
| "audio_tokenizer": "fluac", | |
| "sample_rate": "22khz", | |
| "file": "fluac-22hz-22khz.pt", | |
| "size_mb": 307 | |
| }, | |
| "training_data": { | |
| "hours": 40000, | |
| "type": "audio_conversations" | |
| }, | |
| "capabilities": [ | |
| "text-to-speech", | |
| "conversational-speech", | |
| "voice-cloning", | |
| "on-device-inference" | |
| ], | |
| "torch_dtype": "float32", | |
| "framework": "pytorch" | |
| } |