File size: 2,045 Bytes
074e8ce
 
 
d4d880a
074e8ce
 
 
 
 
 
 
 
 
 
d4d880a
074e8ce
 
 
 
 
d4d880a
074e8ce
 
 
 
 
 
d4d880a
 
 
 
 
 
 
 
 
 
074e8ce
d4d880a
 
 
 
 
 
 
 
 
 
074e8ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""ElevenLabs Text-to-Speech integration."""

import os
from elevenlabs import ElevenLabs, VoiceSettings


def get_client() -> ElevenLabs:
    """Get configured ElevenLabs client."""
    api_key = os.getenv("ELEVENLABS_API_KEY")
    if not api_key:
        raise ValueError("ELEVENLABS_API_KEY environment variable not set")
    return ElevenLabs(api_key=api_key)


def generate_speech(text: str, voice_id: str, voice_settings: dict = None) -> bytes:
    """Generate speech audio from text.

    Args:
        text: The text to convert to speech
        voice_id: ElevenLabs voice ID
        voice_settings: Optional dict with stability, similarity_boost, style, speed

    Returns:
        Audio bytes (MP3 format)
    """
    client = get_client()

    # Build voice settings if provided
    settings = None
    if voice_settings:
        settings = VoiceSettings(
            stability=voice_settings.get("stability", 0.5),
            similarity_boost=voice_settings.get("similarity_boost", 0.75),
            style=voice_settings.get("style", 0.0),
            speed=voice_settings.get("speed", 1.0),
        )

    # Generate audio
    kwargs = {
        "voice_id": voice_id,
        "text": text,
        "model_id": "eleven_multilingual_v2",
        "output_format": "mp3_44100_128",
    }
    if settings:
        kwargs["voice_settings"] = settings

    audio_generator = client.text_to_speech.convert(**kwargs)

    # Collect all audio chunks
    audio_chunks = []
    for chunk in audio_generator:
        audio_chunks.append(chunk)

    return b"".join(audio_chunks)


def generate_speech_file(text: str, voice_id: str, output_path: str) -> str:
    """Generate speech and save to file.

    Args:
        text: The text to convert to speech
        voice_id: ElevenLabs voice ID
        output_path: Path to save the audio file

    Returns:
        Path to the saved audio file
    """
    audio_bytes = generate_speech(text, voice_id)

    with open(output_path, "wb") as f:
        f.write(audio_bytes)

    return output_path