{% extends "base.html" %} {% block title %}TTSFM {{ _('docs.title') }}{% endblock %} {% block extra_css %} {% endblock %} {% block content %}
{{ _('docs.subtitle') }}
{{ _('docs.overview_desc') }}
{{ request.url_root }}api/
{{ _('docs.authentication_desc') }}
Authorization: Bearer YOUR_API_KEY
{{ _('docs.text_validation_desc') }}
max_length: {{ _('docs.max_length_option') }}validate_length: {{ _('docs.validate_length_option') }}preserve_words: {{ _('docs.preserve_words_option') }}{{ _('docs.get_voices_desc') }}
{
"voices": [
{
"id": "alloy",
"name": "Alloy",
"description": "Alloy voice"
},
{
"id": "echo",
"name": "Echo",
"description": "Echo voice"
}
],
"count": 6
}
Get available audio formats for speech generation.
We support multiple format requests, but internally:
{
"formats": [
{
"id": "mp3",
"name": "MP3",
"mime_type": "audio/mp3",
"description": "MP3 audio format"
},
{
"id": "opus",
"name": "Opus",
"mime_type": "audio/wav",
"description": "Returns WAV format"
},
{
"id": "aac",
"name": "AAC",
"mime_type": "audio/wav",
"description": "Returns WAV format"
},
{
"id": "flac",
"name": "FLAC",
"mime_type": "audio/wav",
"description": "Returns WAV format"
},
{
"id": "wav",
"name": "WAV",
"mime_type": "audio/wav",
"description": "WAV audio format"
},
{
"id": "pcm",
"name": "PCM",
"mime_type": "audio/wav",
"description": "Returns WAV format"
}
],
"count": 6
}
{{ _('docs.validate_text_desc') }}
{
"text": "Your text to validate",
"max_length": 4096
}
{
"text_length": 5000,
"max_length": 4096,
"is_valid": false,
"needs_splitting": true,
"suggested_chunks": 2,
"chunk_preview": [
"First chunk preview...",
"Second chunk preview..."
]
}
{{ _('docs.generate_speech_desc') }}
{
"text": "Hello, world!",
"voice": "alloy",
"format": "mp3",
"instructions": "Speak cheerfully",
"max_length": 4096,
"validate_length": true
}
text ({{ _('docs.required') }}): {{ _('docs.text_param') }}voice ({{ _('docs.optional') }}): {{ _('docs.voice_param') }}format ({{ _('docs.optional') }}): {{ _('docs.format_param') }}instructions ({{ _('docs.optional') }}): {{ _('docs.instructions_param') }}max_length ({{ _('docs.optional') }}): {{ _('docs.max_length_param') }}validate_length ({{ _('docs.optional') }}): {{ _('docs.validate_length_param') }}{{ _('docs.response_audio') }}
{{ _('docs.long_text_desc') }}
from ttsfm import TTSClient, Voice, AudioFormat
# Create client
client = TTSClient()
# Generate speech from long text (automatically splits into separate files)
responses = client.generate_speech_long_text(
text="Very long text that exceeds 4096 characters...",
voice=Voice.ALLOY,
response_format=AudioFormat.MP3,
max_length=2000,
preserve_words=True
)
# Save each chunk as separate files
for i, response in enumerate(responses, 1):
response.save_to_file(f"part_{i:03d}.mp3")
{{ _('docs.combined_audio_desc') }}
{
"text": "Very long text that exceeds the limit...",
"voice": "alloy",
"format": "mp3",
"instructions": "Optional voice instructions",
"max_length": 4096,
"preserve_words": true
}
{{ _('docs.response_combined_audio') }}
X-Chunks-Combined: {{ _('docs.chunks_combined_header') }}X-Original-Text-Length: {{ _('docs.original_text_length_header') }}X-Audio-Size: {{ _('docs.audio_size_header') }}{{ _('docs.openai_compatible_desc') }}
{
"model": "gpt-4o-mini-tts",
"input": "Text of any length...",
"voice": "alloy",
"response_format": "mp3",
"instructions": "Optional voice instructions",
"speed": 1.0,
"auto_combine": true,
"max_length": 4096
}
true: {{ _('docs.auto_combine_param') }}false: {{ _('docs.auto_combine_false') }}X-Auto-Combine: {{ _('docs.auto_combine_header') }}X-Chunks-Combined: {{ _('docs.chunks_combined_response') }}X-Original-Text-Length: {{ _('docs.original_text_response') }}X-Audio-Format: {{ _('docs.audio_format_header') }}X-Audio-Size: {{ _('docs.audio_size_response') }}# {{ _('docs.short_text_comment') }}
curl -X POST {{ request.url_root }}v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o-mini-tts",
"input": "Hello world!",
"voice": "alloy"
}'
# {{ _('docs.long_text_auto_comment') }}
curl -X POST {{ request.url_root }}v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o-mini-tts",
"input": "Very long text...",
"voice": "alloy",
"auto_combine": true
}'
# {{ _('docs.long_text_no_auto_comment') }}
curl -X POST {{ request.url_root }}v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o-mini-tts",
"input": "Very long text...",
"voice": "alloy",
"auto_combine": false
}'
# {{ _('docs.python_example_comment') }}
import requests
response = requests.post(
"{{ request.url_root }}api/generate-combined",
json={
"text": "Your very long text content here...",
"voice": "nova",
"format": "mp3",
"max_length": 2000
}
)
if response.status_code == 200:
with open("combined_audio.mp3", "wb") as f:
f.write(response.content)
chunks = response.headers.get('X-Chunks-Combined')
print(f"Combined {chunks} chunks into single file")
Real-time audio streaming for enhanced user experience. Get audio chunks as they're generated instead of waiting for the complete file.
// JavaScript WebSocket client
const client = new WebSocketTTSClient({
socketUrl: '{{ request.url_root[:-1] }}',
debug: true
});
// Connection events
client.onConnect = () => console.log('Connected');
client.onDisconnect = () => console.log('Disconnected');
// Generate speech with real-time streaming
const result = await client.generateSpeech('Hello, WebSocket world!', {
voice: 'alloy',
format: 'mp3',
chunkSize: 1024, // Characters per chunk
// Progress callback
onProgress: (progress) => {
console.log(`Progress: ${progress.progress}%`);
console.log(`Chunks: ${progress.chunksCompleted}/${progress.totalChunks}`);
},
// Receive audio chunks in real-time
onChunk: (chunk) => {
console.log(`Received chunk ${chunk.chunkIndex + 1}`);
// Process or play audio chunk immediately
processAudioChunk(chunk.audioData);
},
// Completion callback
onComplete: (result) => {
console.log('Streaming complete!');
// result.audioData contains the complete audio
}
});
| Event | Description | Payload |
|---|---|---|
generate_stream |
Start TTS generation | {text, voice, format, chunk_size} |
cancel_stream |
Cancel active stream | {request_id} |
| Event | Description | Payload |
|---|---|---|
stream_started |
Stream initiated | {request_id, timestamp} |
audio_chunk |
Audio chunk ready | {request_id, chunk_index, audio_data, duration} |
stream_progress |
Progress update | {progress, chunks_completed, total_chunks} |
stream_complete |
Generation complete | {request_id, total_chunks, status} |
stream_error |
Error occurred | {request_id, error, timestamp} |
// Create a streaming audio player
const audioChunks = [];
let isPlaying = false;
const streamingPlayer = await client.generateSpeech(longText, {
voice: 'nova',
format: 'mp3',
onChunk: (chunk) => {
// Store chunk
audioChunks.push(chunk.audioData);
// Start playing after first chunk
if (!isPlaying && audioChunks.length >= 3) {
startStreamingPlayback(audioChunks);
isPlaying = true;
}
},
onComplete: (result) => {
// Ensure all chunks are played
finishPlayback(result.audioData);
}
});
Experience WebSocket streaming in action at the WebSocket Demo or enable streaming mode in the Playground.