Text-to-Speech
Core ML
Supertonic
speech
audio
tts
ane
apple-silicon
flow-matching
diffusion
multilingual
Instructions to use FluidInference/supertonic-3-coreml with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Supertonic
How to use FluidInference/supertonic-3-coreml with Supertonic:
from supertonic import TTS tts = TTS(auto_download=True) style = tts.get_voice_style(voice_name="M1") text = "The train delay was announced at 4:45 PM on Wed, Apr 3, 2024 due to track maintenance." wav, duration = tts.synthesize(text, voice_style=style) tts.save_audio(wav, "output.wav")
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "supertonic-3-coreml", | |
| "version": "1.7.3", | |
| "upstream": "https://huggingface.co/Supertone/supertonic-3", | |
| "conversion_repo": "https://github.com/FluidInference/mobius/tree/main/models/tts/supertonic-3/coreml", | |
| "integration": "https://github.com/FluidInference/FluidAudio/tree/main/Sources/FluidAudio/TTS/Supertonic3", | |
| "precision": "float16", | |
| "core_ml_target": "iOS18", | |
| "sample_rate": 44100, | |
| "languages": [ | |
| "en", | |
| "ko", | |
| "ja", | |
| "ar", | |
| "bg", | |
| "cs", | |
| "da", | |
| "de", | |
| "el", | |
| "es", | |
| "et", | |
| "fi", | |
| "fr", | |
| "hi", | |
| "hr", | |
| "hu", | |
| "id", | |
| "it", | |
| "lt", | |
| "lv", | |
| "nl", | |
| "pl", | |
| "pt", | |
| "ro", | |
| "ru", | |
| "sk", | |
| "sl", | |
| "sv", | |
| "tr", | |
| "uk", | |
| "vi" | |
| ], | |
| "modules": [ | |
| { | |
| "name": "TextEncoder", | |
| "precision": "fp16", | |
| "package": "TextEncoder.mlpackage", | |
| "compiled": "TextEncoder.mlmodelc", | |
| "shape_policy": "fixed T=128", | |
| "package_size_bytes": 18166801, | |
| "compiled_size_bytes": 18179790, | |
| "package_files": [ | |
| { | |
| "path": "TextEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel", | |
| "sha256": "9674a311dd5145e27554c1d021f25531f90ed772a3758ab349b605967faec667", | |
| "size_bytes": 144168 | |
| }, | |
| { | |
| "path": "TextEncoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin", | |
| "sha256": "7c7afa5d02426a8a363c9cef0e27ce64a5c57a67d1a59f60aac0ac58ffb9877f", | |
| "size_bytes": 18022016 | |
| }, | |
| { | |
| "path": "TextEncoder.mlpackage/Manifest.json", | |
| "sha256": "3d1496b0ff38b32feb32f36468e5920909176c6dc4dbe3bf70db97b03beb9e85", | |
| "size_bytes": 617 | |
| } | |
| ], | |
| "compiled_files": [ | |
| { | |
| "path": "TextEncoder.mlmodelc/.DS_Store", | |
| "sha256": "cf4645f055711856ff0569b29315fb462026365385312c81e7db25c789e76e2b", | |
| "size_bytes": 6148 | |
| }, | |
| { | |
| "path": "TextEncoder.mlmodelc/analytics/coremldata.bin", | |
| "sha256": "b39c208b46f180dcbf4cfcd3b4437eeb93371fb77d6d636b99ca3ddff5327253", | |
| "size_bytes": 243 | |
| }, | |
| { | |
| "path": "TextEncoder.mlmodelc/coremldata.bin", | |
| "sha256": "4349aa84bb4a4731e3be09eb235e1816a3d0c88500b151479e08fe15662a446c", | |
| "size_bytes": 431 | |
| }, | |
| { | |
| "path": "TextEncoder.mlmodelc/model.mil", | |
| "sha256": "6ef23a27a343f540bee78f5b2e85bbd83443c960c151d0e3bd57ffe5c6082ed1", | |
| "size_bytes": 150952 | |
| }, | |
| { | |
| "path": "TextEncoder.mlmodelc/weights/weight.bin", | |
| "sha256": "7c7afa5d02426a8a363c9cef0e27ce64a5c57a67d1a59f60aac0ac58ffb9877f", | |
| "size_bytes": 18022016 | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "DurationPredictor", | |
| "precision": "fp16", | |
| "package": "DurationPredictor.mlpackage", | |
| "compiled": "DurationPredictor.mlmodelc", | |
| "shape_policy": "fixed T=128", | |
| "package_size_bytes": 1892063, | |
| "compiled_size_bytes": 1902563, | |
| "package_files": [ | |
| { | |
| "path": "DurationPredictor.mlpackage/Data/com.apple.CoreML/model.mlmodel", | |
| "sha256": "ccb56ff515b7d41cf79d21d6be2f2d23a31a295810950dd86e39620f9629cad4", | |
| "size_bytes": 93494 | |
| }, | |
| { | |
| "path": "DurationPredictor.mlpackage/Data/com.apple.CoreML/weights/weight.bin", | |
| "sha256": "7f709c544087966d6b2538ac50b7a4014d6e1049e21d713cf3bc71c4c5b9307c", | |
| "size_bytes": 1797952 | |
| }, | |
| { | |
| "path": "DurationPredictor.mlpackage/Manifest.json", | |
| "sha256": "36d830eb7bfbe233cdfca3ac155eb0a8894a2a52fa96ca81b38d00d5c1267f8a", | |
| "size_bytes": 617 | |
| } | |
| ], | |
| "compiled_files": [ | |
| { | |
| "path": "DurationPredictor.mlmodelc/.DS_Store", | |
| "sha256": "75c79ac936a1add17210b7cbec4962505cc557bc2f940e276b1236d35ff12dfa", | |
| "size_bytes": 6148 | |
| }, | |
| { | |
| "path": "DurationPredictor.mlmodelc/analytics/coremldata.bin", | |
| "sha256": "b836b72b388c2bc009a13bcc495cd08278a1742b1a866cdaf7faab7d80786bcb", | |
| "size_bytes": 243 | |
| }, | |
| { | |
| "path": "DurationPredictor.mlmodelc/coremldata.bin", | |
| "sha256": "3ee71b864377bd54479f6b07d141ed37c9fc49718aba6dba21c56cc5cb975be0", | |
| "size_bytes": 425 | |
| }, | |
| { | |
| "path": "DurationPredictor.mlmodelc/model.mil", | |
| "sha256": "e87f25d43a28b967628e6c0089535390687203e54520e7e3223c771b26fbc0bd", | |
| "size_bytes": 97795 | |
| }, | |
| { | |
| "path": "DurationPredictor.mlmodelc/weights/weight.bin", | |
| "sha256": "7f709c544087966d6b2538ac50b7a4014d6e1049e21d713cf3bc71c4c5b9307c", | |
| "size_bytes": 1797952 | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "VectorEstimator", | |
| "precision": "fp16", | |
| "package": "VectorEstimator.mlpackage", | |
| "compiled": "VectorEstimator.mlmodelc", | |
| "shape_policy": "RangeDim L,T 17..512", | |
| "package_size_bytes": 127977306, | |
| "compiled_size_bytes": 128000054, | |
| "package_files": [ | |
| { | |
| "path": "VectorEstimator.mlpackage/Data/com.apple.CoreML/model.mlmodel", | |
| "sha256": "c93445cd09b7e6190fd26873cafb4a209fbb24d836b665428f7114ed6b0b41df", | |
| "size_bytes": 328305 | |
| }, | |
| { | |
| "path": "VectorEstimator.mlpackage/Data/com.apple.CoreML/weights/weight.bin", | |
| "sha256": "101cebe7759b5f5e8f188ed3c2bf5f624bbf048aa657fe5bed9c3110c5ce0c26", | |
| "size_bytes": 127648384 | |
| }, | |
| { | |
| "path": "VectorEstimator.mlpackage/Manifest.json", | |
| "sha256": "825048428f8073ddcc4f22dfeb1d6a649f5799ef8e37fdb7d47ecaac8ec48590", | |
| "size_bytes": 617 | |
| } | |
| ], | |
| "compiled_files": [ | |
| { | |
| "path": "VectorEstimator.mlmodelc/.DS_Store", | |
| "sha256": "d61692ef37d205b073915a28b0fcca8eb5360563286dc61d44acbd2af6b4a184", | |
| "size_bytes": 6148 | |
| }, | |
| { | |
| "path": "VectorEstimator.mlmodelc/analytics/coremldata.bin", | |
| "sha256": "1f43626d16b133a8b302f4f7a0e6d0f789422b22df135b7af11df25c385bebe0", | |
| "size_bytes": 243 | |
| }, | |
| { | |
| "path": "VectorEstimator.mlmodelc/coremldata.bin", | |
| "sha256": "e4ae2bd30a9130ac2d09996ad4f63ea2c5703f735d11d9cf7e494d14397fd495", | |
| "size_bytes": 633 | |
| }, | |
| { | |
| "path": "VectorEstimator.mlmodelc/model.mil", | |
| "sha256": "56c549b708a496f94e2b7c3ff7671414fb4d8a676df0b026ebab71c334a76ccf", | |
| "size_bytes": 344646 | |
| }, | |
| { | |
| "path": "VectorEstimator.mlmodelc/weights/weight.bin", | |
| "sha256": "101cebe7759b5f5e8f188ed3c2bf5f624bbf048aa657fe5bed9c3110c5ce0c26", | |
| "size_bytes": 127648384 | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "VectorEstimator_int8", | |
| "precision": "int8 weight-only / fp16 activations (per-channel symmetric)", | |
| "package": "VectorEstimator_int8.mlpackage", | |
| "compiled": "VectorEstimator_int8.mlmodelc", | |
| "shape_policy": "RangeDim L,T 17..512", | |
| "package_size_bytes": 64530961, | |
| "compiled_size_bytes": 64551709, | |
| "package_files": [ | |
| { | |
| "path": "VectorEstimator_int8.mlpackage/Data/com.apple.CoreML/model.mlmodel", | |
| "sha256": "15d1760d354e6849c72ae2fd81805bf7121aab92567f12b0f230bf70268cc853", | |
| "size_bytes": 345736 | |
| }, | |
| { | |
| "path": "VectorEstimator_int8.mlpackage/Data/com.apple.CoreML/weights/weight.bin", | |
| "sha256": "0e637c3a1725b5e1b3df09cc162d1822e8fe80fa92ba01c180dde2ca29c12523", | |
| "size_bytes": 64184608 | |
| }, | |
| { | |
| "path": "VectorEstimator_int8.mlpackage/Manifest.json", | |
| "sha256": "32b9a6e8860f4fe2a5f873042621b59aac94af0d1a62e184514b40a16d258897", | |
| "size_bytes": 617 | |
| } | |
| ], | |
| "compiled_files": [ | |
| { | |
| "path": "VectorEstimator_int8.mlmodelc/analytics/coremldata.bin", | |
| "sha256": "982da75910aca2f5f0e2f813bf7db9ec201fbf4c71668d0f0a99eb2f9da983d0", | |
| "size_bytes": 243 | |
| }, | |
| { | |
| "path": "VectorEstimator_int8.mlmodelc/coremldata.bin", | |
| "sha256": "e93e9a59f6d4035aa07cf21ff06e160eb1bbcdad9b48d2ac413d11f3c2c26949", | |
| "size_bytes": 633 | |
| }, | |
| { | |
| "path": "VectorEstimator_int8.mlmodelc/model.mil", | |
| "sha256": "4286e9f3ea253bdbca50e246d5568e9f89e130e05b7c8243c54d70f3611e5440", | |
| "size_bytes": 366225 | |
| }, | |
| { | |
| "path": "VectorEstimator_int8.mlmodelc/weights/weight.bin", | |
| "sha256": "0e637c3a1725b5e1b3df09cc162d1822e8fe80fa92ba01c180dde2ca29c12523", | |
| "size_bytes": 64184608 | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Vocoder", | |
| "precision": "fp16", | |
| "package": "Vocoder.mlpackage", | |
| "compiled": "Vocoder.mlmodelc", | |
| "shape_policy": "RangeDim L_ttl 4..512", | |
| "package_size_bytes": 50743824, | |
| "compiled_size_bytes": 50754917, | |
| "package_files": [ | |
| { | |
| "path": "Vocoder.mlpackage/Data/com.apple.CoreML/model.mlmodel", | |
| "sha256": "e0aefc35639793ac4dfcb74cddfca17744e7bb302f7af34a330d83d4b59e2831", | |
| "size_bytes": 70695 | |
| }, | |
| { | |
| "path": "Vocoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin", | |
| "sha256": "b45e4c17a45cc4de30dcedae9f662548aa35f8f9763632c342b8bbef3089fcee", | |
| "size_bytes": 50672512 | |
| }, | |
| { | |
| "path": "Vocoder.mlpackage/Manifest.json", | |
| "sha256": "5f675492dc6d4d345386d7c974c246ce3228c457acaed27f7aff00754a7bce45", | |
| "size_bytes": 617 | |
| } | |
| ], | |
| "compiled_files": [ | |
| { | |
| "path": "Vocoder.mlmodelc/.DS_Store", | |
| "sha256": "aad4800457fdb1567500acd0f9eb203b27c71d29189891095b9d371f8c9c58b5", | |
| "size_bytes": 6148 | |
| }, | |
| { | |
| "path": "Vocoder.mlmodelc/analytics/coremldata.bin", | |
| "sha256": "4fe1b825137629a96dc58a1339bc4ece32041b755f99d638f21a153f2e7faed6", | |
| "size_bytes": 243 | |
| }, | |
| { | |
| "path": "Vocoder.mlmodelc/coremldata.bin", | |
| "sha256": "dbf54fc98879ee1c9a213452e16d09367f3c34ad489b175bcae7ef5f74f8d121", | |
| "size_bytes": 388 | |
| }, | |
| { | |
| "path": "Vocoder.mlmodelc/model.mil", | |
| "sha256": "5f57d6b16a4ee7237e046a423c265aa2c487857eb0003bba1c89fa66e577353f", | |
| "size_bytes": 75626 | |
| }, | |
| { | |
| "path": "Vocoder.mlmodelc/weights/weight.bin", | |
| "sha256": "b45e4c17a45cc4de30dcedae9f662548aa35f8f9763632c342b8bbef3089fcee", | |
| "size_bytes": 50672512 | |
| } | |
| ] | |
| } | |
| ], | |
| "assets": [ | |
| { | |
| "path": "tts.json", | |
| "sha256": "42078d3aef1cd43ab43021f3c54f47d2d75ceb4e75f627f118890128b06a0d09", | |
| "size_bytes": 8253 | |
| }, | |
| { | |
| "path": "unicode_indexer.json", | |
| "sha256": "9bf7346e43883a81f8645c81224f786d43c5b57f3641f6e7671a7d6c493cb24f", | |
| "size_bytes": 277676 | |
| }, | |
| { | |
| "path": "voice_styles/M1.json", | |
| "sha256": "e35604687f5d23694b8e91593a93eec0e4eca6c0b02bb8ed69139ab2ea6b0a5b", | |
| "size_bytes": 291748 | |
| } | |
| ] | |
| } |