github-actions[bot] commited on
Commit
62f0a86
·
1 Parent(s): 74be2eb

Sync from GitHub 3b0f7c9651fbf32085152edd156d5113f1f288cc

Browse files
Files changed (1) hide show
  1. src/artifacts/tts_adapter.py +79 -5
src/artifacts/tts_adapter.py CHANGED
@@ -71,6 +71,7 @@ class ElevenLabsTTS(TTSAdapter):
71
  self.client = ElevenLabs(api_key=self.api_key)
72
  self.default_voice = os.getenv("TTS_ELEVENLABS_VOICE_1", "Rachel")
73
  self.default_model = os.getenv("TTS_ELEVENLABS_MODEL", "eleven_multilingual_v2")
 
74
  self._voice_aliases = self._load_voice_aliases()
75
 
76
  def _load_voice_aliases(self) -> dict[str, str]:
@@ -99,6 +100,83 @@ class ElevenLabsTTS(TTSAdapter):
99
  candidate = self.default_voice
100
  return self._voice_aliases.get(candidate.lower(), candidate)
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def _write_audio_output(self, audio: Any, output_path: str) -> None:
103
  """
104
  ElevenLabs SDK returns either bytes, file-like, or iterable chunks depending
@@ -154,11 +232,7 @@ class ElevenLabsTTS(TTSAdapter):
154
  for voice_candidate in voice_candidates:
155
  for model_candidate in model_candidates:
156
  try:
157
- audio = self.client.generate(
158
- text=text,
159
- voice=voice_candidate,
160
- model=model_candidate,
161
- )
162
  self._write_audio_output(audio, output_path)
163
  return output_path
164
  except Exception as exc:
 
71
  self.client = ElevenLabs(api_key=self.api_key)
72
  self.default_voice = os.getenv("TTS_ELEVENLABS_VOICE_1", "Rachel")
73
  self.default_model = os.getenv("TTS_ELEVENLABS_MODEL", "eleven_multilingual_v2")
74
+ self.output_format = os.getenv("TTS_ELEVENLABS_OUTPUT_FORMAT", "mp3_44100_128")
75
  self._voice_aliases = self._load_voice_aliases()
76
 
77
  def _load_voice_aliases(self) -> dict[str, str]:
 
100
  candidate = self.default_voice
101
  return self._voice_aliases.get(candidate.lower(), candidate)
102
 
103
+ def _try_call_variants(self, method: Any, variants: list[dict[str, Any]]) -> Any:
104
+ """
105
+ Some ElevenLabs SDK versions use different parameter names.
106
+ Try a small set of known-compatible signatures.
107
+ """
108
+ last_type_error: TypeError | None = None
109
+ for kwargs in variants:
110
+ try:
111
+ return method(**kwargs)
112
+ except TypeError as exc:
113
+ last_type_error = exc
114
+ continue
115
+ if last_type_error:
116
+ raise last_type_error
117
+ raise RuntimeError("Unable to call ElevenLabs SDK method with known signatures.")
118
+
119
+ def _request_audio(self, text: str, voice_candidate: str, model_candidate: str) -> Any:
120
+ """
121
+ Support both legacy and modern ElevenLabs Python SDK APIs:
122
+ - legacy: client.generate(...)
123
+ - modern: client.text_to_speech.convert(...)/convert_as_stream(...)
124
+ """
125
+ if hasattr(self.client, "generate"):
126
+ return self._try_call_variants(
127
+ self.client.generate,
128
+ [
129
+ {"text": text, "voice": voice_candidate, "model": model_candidate},
130
+ {"text": text, "voice": voice_candidate, "model_id": model_candidate},
131
+ {"text": text, "voice_id": voice_candidate, "model_id": model_candidate},
132
+ ],
133
+ )
134
+
135
+ tts_api = getattr(self.client, "text_to_speech", None)
136
+ if tts_api is not None:
137
+ if hasattr(tts_api, "convert_as_stream"):
138
+ return self._try_call_variants(
139
+ tts_api.convert_as_stream,
140
+ [
141
+ {
142
+ "text": text,
143
+ "voice_id": voice_candidate,
144
+ "model_id": model_candidate,
145
+ "output_format": self.output_format,
146
+ },
147
+ {
148
+ "text": text,
149
+ "voice_id": voice_candidate,
150
+ "model_id": model_candidate,
151
+ },
152
+ {"text": text, "voice": voice_candidate, "model": model_candidate},
153
+ ],
154
+ )
155
+
156
+ if hasattr(tts_api, "convert"):
157
+ return self._try_call_variants(
158
+ tts_api.convert,
159
+ [
160
+ {
161
+ "text": text,
162
+ "voice_id": voice_candidate,
163
+ "model_id": model_candidate,
164
+ "output_format": self.output_format,
165
+ },
166
+ {
167
+ "text": text,
168
+ "voice_id": voice_candidate,
169
+ "model_id": model_candidate,
170
+ },
171
+ {"text": text, "voice": voice_candidate, "model": model_candidate},
172
+ ],
173
+ )
174
+
175
+ raise AttributeError(
176
+ "No compatible ElevenLabs synthesis method found on client "
177
+ "(expected generate() or text_to_speech.convert())."
178
+ )
179
+
180
  def _write_audio_output(self, audio: Any, output_path: str) -> None:
181
  """
182
  ElevenLabs SDK returns either bytes, file-like, or iterable chunks depending
 
232
  for voice_candidate in voice_candidates:
233
  for model_candidate in model_candidates:
234
  try:
235
+ audio = self._request_audio(text, voice_candidate, model_candidate)
 
 
 
 
236
  self._write_audio_output(audio, output_path)
237
  return output_path
238
  except Exception as exc: