File size: 4,927 Bytes
4cdaca5
 
369b798
4cdaca5
369b798
9b6a0be
 
 
 
 
 
 
 
 
 
 
 
 
 
4cdaca5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369b798
4cdaca5
 
 
 
369b798
4cdaca5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cdaf71
4cdaca5
 
 
369b798
4cdaca5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cdaf71
4cdaca5
 
 
369b798
4cdaca5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369b798
4cdaca5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""Text-to-Speech Service using ElevenLabs"""
import os
from typing import Optional, List, Dict
from elevenlabs import ElevenLabs, VoiceSettings

# Voice options mapping (name -> voice_id)
VOICE_OPTIONS = {
    "Rachel (Female)": "21m00Tcm4TlvDq8ikWAM",  # Default - Rachel
    "Lera (Female)": "EXAVITQu4vr4xnSDxMaL",  # Lera - female voice
    "Bella (Female)": "EXAVITQu4vr4xnSDxMaL",  # Alternative female
    "Antoni (Male)": "ErXwobaYiN019PkySvjV",  # Antoni - male voice
    "Arnold (Male)": "VR6AewLTigWG4xSOukaG",  # Arnold - male voice
    "Adam (Male)": "pNInz6obpgDQGcFmaJgB",  # Adam - male voice
    "Domi (Female)": "AZnzlk1XvdvUeBnXmlld",  # Domi - female voice
    "Elli (Female)": "MF3mGyEYCl7XYWbV9V6O",  # Elli - female voice
    "Josh (Male)": "TxGEqnHWrfWFTfGW9XjX",  # Josh - male voice
    "Sam (Male)": "yoZ06aMxZJJ28mfd3POQ",  # Sam - male voice
}


class TTSService:
    """Text-to-Speech service using ElevenLabs API"""
    
    def __init__(self, api_key: str, voice_id: str = "21m00Tcm4TlvDq8ikWAM"):
        """
        Initialize TTS service
        
        Args:
            api_key: ElevenLabs API key
            voice_id: Voice ID to use (default: Rachel)
        """
        self.api_key = api_key
        self.voice_id = voice_id
        self.client = None
        self.available = False
        
        if api_key:
            try:
                self.client = ElevenLabs(api_key=api_key)
                self.available = True
            except Exception as e:
                print(f"Error initializing ElevenLabs client: {e}")
    
    def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> Optional[bytes]:
        """
        Convert text to speech
        
        Args:
            text: Text to convert
            voice_id: Optional voice ID override
            
        Returns:
            Audio bytes or None if error
        """
        if not self.client:
            print("ElevenLabs client not initialized. Please check API key.")
            return None
        
        try:
            voice_to_use = voice_id or self.voice_id
            
            # Generate audio using newer model (free tier compatible)
            audio = self.client.generate(
                text=text,
                voice=voice_to_use,
                model="eleven_turbo_v2_5"  # Free tier compatible model
            )
            
            # Convert generator to bytes
            audio_bytes = b""
            for chunk in audio:
                audio_bytes += chunk
            
            return audio_bytes
            
        except Exception as e:
            print(f"Error generating speech: {e}")
            return None
    
    def text_to_speech_stream(self, text: str, voice_id: Optional[str] = None):
        """
        Convert text to speech with streaming
        
        Args:
            text: Text to convert
            voice_id: Optional voice ID override
            
        Yields:
            Audio chunks
        """
        if not self.client:
            print("ElevenLabs client not initialized. Please check API key.")
            return
        
        try:
            voice_to_use = voice_id or self.voice_id
            
            # Stream audio using newer model (free tier compatible)
            audio_stream = self.client.generate(
                text=text,
                voice=voice_to_use,
                model="eleven_turbo_v2_5",
                stream=True
            )
            
            for chunk in audio_stream:
                yield chunk
                
        except Exception as e:
            print(f"Error streaming speech: {e}")
            return
    
    def save_audio(self, audio_bytes: bytes, filename: str) -> bool:
        """
        Save audio bytes to file
        
        Args:
            audio_bytes: Audio data
            filename: Output filename
            
        Returns:
            Success status
        """
        try:
            with open(filename, 'wb') as f:
                f.write(audio_bytes)
            return True
        except Exception as e:
            print(f"Error saving audio: {e}")
            return False
    
    def get_available_voices(self) -> List[Dict[str, str]]:
        """
        Get list of available voices
        
        Returns:
            List of voice information
        """
        if not self.client:
            return []
        
        try:
            voices = self.client.voices.get_all()
            return [{"voice_id": v.voice_id, "name": v.name} for v in voices.voices]
        except Exception as e:
            print(f"Error getting voices: {e}")
            return [
                {"voice_id": "21m00Tcm4TlvDq8ikWAM", "name": "Rachel (Default)"},
                {"voice_id": "ErXwobaYiN019PkySvjV", "name": "Antoni"},
                {"voice_id": "MF3mGyEYCl7XYWbV9V6O", "name": "Elli"},
            ]