github-actions[bot] commited on
Commit
74be2eb
·
1 Parent(s): 1815b1f

Sync from GitHub abaefddd1c944e527964d7379da3cfc39209b3a5

Browse files
src/artifacts/podcast_generator.py CHANGED
@@ -76,6 +76,7 @@ class PodcastGenerator:
76
  tts_provider = tts_provider or os.getenv("TTS_PROVIDER", "edge")
77
  self.tts = get_tts_adapter(tts_provider)
78
  self.tts_provider = tts_provider
 
79
 
80
  # Default settings from .env
81
  self.default_duration = os.getenv("DEFAULT_PODCAST_DURATION", "5min")
@@ -141,13 +142,18 @@ class PodcastGenerator:
141
 
142
  # 3. Synthesize audio segments
143
  print(f"🎵 Synthesizing audio with {self.tts_provider}...")
 
144
  audio_segments = self._synthesize_segments(script, user_id, notebook_id, hosts)
145
  if not audio_segments:
 
 
 
 
 
 
 
146
  return {
147
- "error": (
148
- "Transcript generated but audio synthesis failed for all segments. "
149
- "Check TTS provider credentials, quota, and configured voices."
150
- ),
151
  "transcript": script,
152
  "audio_path": None,
153
  "metadata": {
@@ -159,6 +165,7 @@ class PodcastGenerator:
159
  "llm_model": self.model,
160
  "num_segments": len(script),
161
  "topic_focus": topic_focus,
 
162
  "generated_at": datetime.utcnow().isoformat(),
163
  },
164
  }
@@ -449,6 +456,7 @@ IMPORTANT:
449
  voices = voice_maps.get(self.tts_provider, voice_maps["edge"])
450
 
451
  audio_files: List[str] = []
 
452
  total = len(script)
453
 
454
  for i, segment in enumerate(script, 1):
@@ -463,7 +471,12 @@ IMPORTANT:
463
  audio_files.append(output_path)
464
  print(f" ✓ Segment {i}/{total}: {speaker}")
465
  except Exception as e:
466
- print(f" ⚠️ Failed segment {i}: {e}")
 
 
 
 
 
467
  continue
468
 
469
  return audio_files
 
76
  tts_provider = tts_provider or os.getenv("TTS_PROVIDER", "edge")
77
  self.tts = get_tts_adapter(tts_provider)
78
  self.tts_provider = tts_provider
79
+ self._last_tts_errors: List[str] = []
80
 
81
  # Default settings from .env
82
  self.default_duration = os.getenv("DEFAULT_PODCAST_DURATION", "5min")
 
142
 
143
  # 3. Synthesize audio segments
144
  print(f"🎵 Synthesizing audio with {self.tts_provider}...")
145
+ self._last_tts_errors = []
146
  audio_segments = self._synthesize_segments(script, user_id, notebook_id, hosts)
147
  if not audio_segments:
148
+ tts_error_preview = "; ".join(self._last_tts_errors[:3]).strip()
149
+ failure_message = (
150
+ "Transcript generated but audio synthesis failed for all segments. "
151
+ "Check TTS provider credentials, quota, and configured voices."
152
+ )
153
+ if tts_error_preview:
154
+ failure_message = f"{failure_message} Provider errors: {tts_error_preview}"
155
  return {
156
+ "error": failure_message,
 
 
 
157
  "transcript": script,
158
  "audio_path": None,
159
  "metadata": {
 
165
  "llm_model": self.model,
166
  "num_segments": len(script),
167
  "topic_focus": topic_focus,
168
+ "tts_errors": self._last_tts_errors[:20],
169
  "generated_at": datetime.utcnow().isoformat(),
170
  },
171
  }
 
456
  voices = voice_maps.get(self.tts_provider, voice_maps["edge"])
457
 
458
  audio_files: List[str] = []
459
+ self._last_tts_errors = []
460
  total = len(script)
461
 
462
  for i, segment in enumerate(script, 1):
 
471
  audio_files.append(output_path)
472
  print(f" ✓ Segment {i}/{total}: {speaker}")
473
  except Exception as e:
474
+ error_detail = (
475
+ f"segment={i}/{total}, speaker={speaker}, voice={voice}, "
476
+ f"error={type(e).__name__}: {' '.join(str(e).split())}"
477
+ )
478
+ self._last_tts_errors.append(error_detail)
479
+ print(f" ⚠️ Failed {error_detail}")
480
  continue
481
 
482
  return audio_files
src/artifacts/tts_adapter.py CHANGED
@@ -3,7 +3,7 @@ Text-to-Speech adapter supporting multiple providers.
3
  """
4
  import os
5
  from pathlib import Path
6
- from typing import Literal, Optional
7
  from abc import ABC, abstractmethod
8
  from dotenv import load_dotenv
9
 
@@ -66,26 +66,109 @@ class ElevenLabsTTS(TTSAdapter):
66
  from elevenlabs.client import ElevenLabs
67
 
68
  self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
 
 
69
  self.client = ElevenLabs(api_key=self.api_key)
70
  self.default_voice = os.getenv("TTS_ELEVENLABS_VOICE_1", "Rachel")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def synthesize(self, text: str, output_path: str, voice: Optional[str] = None) -> str:
73
  """
74
  Popular voices: Rachel, Domi, Bella, Antoni, Elli, Josh, Arnold, Adam, Sam
75
  """
76
- voice = voice or self.default_voice
77
-
78
- audio = self.client.generate(
79
- text=text,
80
- voice=voice,
81
- model="eleven_monolingual_v1"
82
- )
83
-
84
- with open(output_path, "wb") as f:
85
- for chunk in audio:
86
- f.write(chunk)
87
-
88
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
 
91
  class EdgeTTS(TTSAdapter):
@@ -156,4 +239,4 @@ if __name__ == "__main__":
156
 
157
  print(f"✓ Audio generated: {output_file}")
158
  print(f" Provider: {args.provider or os.getenv('TTS_PROVIDER', 'edge')}")
159
- print(f" Voice: {args.voice or 'default'}")
 
3
  """
4
  import os
5
  from pathlib import Path
6
+ from typing import Any, Literal, Optional
7
  from abc import ABC, abstractmethod
8
  from dotenv import load_dotenv
9
 
 
66
  from elevenlabs.client import ElevenLabs
67
 
68
  self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
69
+ if not self.api_key:
70
+ raise ValueError("ELEVENLABS_API_KEY environment variable not set")
71
  self.client = ElevenLabs(api_key=self.api_key)
72
  self.default_voice = os.getenv("TTS_ELEVENLABS_VOICE_1", "Rachel")
73
+ self.default_model = os.getenv("TTS_ELEVENLABS_MODEL", "eleven_multilingual_v2")
74
+ self._voice_aliases = self._load_voice_aliases()
75
+
76
+ def _load_voice_aliases(self) -> dict[str, str]:
77
+ """Best-effort map of configured voice names to voice IDs."""
78
+ try:
79
+ response = self.client.voices.get_all()
80
+ voices = getattr(response, "voices", response)
81
+ except Exception:
82
+ return {}
83
+
84
+ aliases: dict[str, str] = {}
85
+ for voice in voices or []:
86
+ if isinstance(voice, dict):
87
+ name = voice.get("name")
88
+ voice_id = voice.get("voice_id")
89
+ else:
90
+ name = getattr(voice, "name", None)
91
+ voice_id = getattr(voice, "voice_id", None)
92
+ if name and voice_id:
93
+ aliases[str(name).strip().lower()] = str(voice_id).strip()
94
+ return aliases
95
+
96
+ def _resolve_voice(self, voice: str) -> str:
97
+ candidate = str(voice or "").strip()
98
+ if not candidate:
99
+ candidate = self.default_voice
100
+ return self._voice_aliases.get(candidate.lower(), candidate)
101
+
102
+ def _write_audio_output(self, audio: Any, output_path: str) -> None:
103
+ """
104
+ ElevenLabs SDK returns either bytes, file-like, or iterable chunks depending
105
+ on version/options. Handle all supported shapes safely.
106
+ """
107
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
108
+ with open(output_path, "wb") as f:
109
+ if isinstance(audio, (bytes, bytearray)):
110
+ f.write(bytes(audio))
111
+ return
112
+
113
+ if hasattr(audio, "read"):
114
+ data = audio.read()
115
+ if isinstance(data, str):
116
+ data = data.encode("utf-8")
117
+ if not isinstance(data, (bytes, bytearray)):
118
+ raise TypeError("ElevenLabs returned unsupported file-like payload.")
119
+ f.write(bytes(data))
120
+ return
121
+
122
+ wrote_any = False
123
+ for chunk in audio:
124
+ if chunk is None:
125
+ continue
126
+ wrote_any = True
127
+ if isinstance(chunk, int):
128
+ f.write(bytes([chunk]))
129
+ elif isinstance(chunk, str):
130
+ f.write(chunk.encode("utf-8"))
131
+ elif isinstance(chunk, (bytes, bytearray)):
132
+ f.write(bytes(chunk))
133
+ else:
134
+ raise TypeError(f"Unsupported ElevenLabs audio chunk type: {type(chunk)!r}")
135
+
136
+ if not wrote_any:
137
+ raise RuntimeError("ElevenLabs returned an empty audio stream.")
138
 
139
  def synthesize(self, text: str, output_path: str, voice: Optional[str] = None) -> str:
140
  """
141
  Popular voices: Rachel, Domi, Bella, Antoni, Elli, Josh, Arnold, Adam, Sam
142
  """
143
+ requested_voice = voice or self.default_voice
144
+ resolved_voice = self._resolve_voice(requested_voice)
145
+ voice_candidates = [resolved_voice]
146
+ if requested_voice != resolved_voice:
147
+ voice_candidates.append(requested_voice)
148
+
149
+ model_candidates = [self.default_model]
150
+ if self.default_model != "eleven_multilingual_v2":
151
+ model_candidates.append("eleven_multilingual_v2")
152
+
153
+ errors: list[str] = []
154
+ for voice_candidate in voice_candidates:
155
+ for model_candidate in model_candidates:
156
+ try:
157
+ audio = self.client.generate(
158
+ text=text,
159
+ voice=voice_candidate,
160
+ model=model_candidate,
161
+ )
162
+ self._write_audio_output(audio, output_path)
163
+ return output_path
164
+ except Exception as exc:
165
+ errors.append(
166
+ f"voice={voice_candidate}, model={model_candidate}: "
167
+ f"{type(exc).__name__}: {exc}"
168
+ )
169
+
170
+ preview = " | ".join(errors[:3]) if errors else "unknown ElevenLabs error"
171
+ raise RuntimeError(f"ElevenLabs synthesis failed. {preview}")
172
 
173
 
174
  class EdgeTTS(TTSAdapter):
 
239
 
240
  print(f"✓ Audio generated: {output_file}")
241
  print(f" Provider: {args.provider or os.getenv('TTS_PROVIDER', 'edge')}")
242
+ print(f" Voice: {args.voice or 'default'}")