idacosta commited on
Commit
c9e812c
·
1 Parent(s): 2e4ee45

Fix podcast playback and enforce TTS length limit

Browse files
Files changed (1) hide show
  1. services/podcast_service.py +39 -1
services/podcast_service.py CHANGED
@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
17
 
18
  MODEL = "claude-haiku-4-5-20251001"
19
  MAX_TOKENS = 2048
 
20
 
21
  # Use facebook/mms-tts-eng for both — it's free and reliable
22
  # We differentiate voices by slightly modifying the text (different speaking rates aren't
@@ -98,6 +99,33 @@ def _chunk_text(text: str, max_chars: int = TTS_CHUNK_CHARS) -> list[str]:
98
  return chunks
99
 
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def _tts_request(text: str, token: str) -> bytes | None:
102
  """Deprecated: Use OpenAI TTS via _synthesize_audio instead."""
103
  return None
@@ -116,7 +144,7 @@ def _synthesize_audio(lines: list[tuple[str, str]], output_path: Path) -> bool:
116
 
117
  logger.info("TTS: Generating audio for %d lines (%d chars)", len(lines), len(full_text))
118
 
119
- max_chars = 4000
120
  if len(full_text) > max_chars:
121
  trimmed = _chunk_text(full_text, max_chars)[0]
122
  logger.warning(
@@ -176,6 +204,16 @@ def generate_podcast(notebook: Notebook, summary_content: str) -> Artifact:
176
  ("Alex", "Thanks for listening — we'll be back soon!"),
177
  ]
178
 
 
 
 
 
 
 
 
 
 
 
179
  markdown_script = _script_to_markdown(lines)
180
 
181
  audio_filename = f"podcast_{uuid.uuid4().hex[:12]}.mp3"
 
17
 
18
  MODEL = "claude-haiku-4-5-20251001"
19
  MAX_TOKENS = 2048
20
+ MAX_TTS_INPUT_CHARS = 4096
21
 
22
  # Use facebook/mms-tts-eng for both — it's free and reliable
23
  # We differentiate voices by slightly modifying the text (different speaking rates aren't
 
99
  return chunks
100
 
101
 
102
+ def _truncate_lines_for_tts_limit(lines: list[tuple[str, str]], max_chars: int = MAX_TTS_INPUT_CHARS) -> list[tuple[str, str]]:
103
+ """Ensure speaker lines serialize to <= max_chars for OpenAI TTS input."""
104
+ kept: list[tuple[str, str]] = []
105
+ current_len = 0
106
+
107
+ for speaker, text in lines:
108
+ line = f"{speaker}: {text}"
109
+ line_len = len(line)
110
+ sep_len = 1 if kept else 0
111
+
112
+ if current_len + sep_len + line_len <= max_chars:
113
+ kept.append((speaker, text))
114
+ current_len += sep_len + line_len
115
+ continue
116
+
117
+ remaining = max_chars - current_len - sep_len
118
+ if remaining > len(f"{speaker}: "):
119
+ prefix = f"{speaker}: "
120
+ allowed_text_chars = remaining - len(prefix)
121
+ trimmed_text = text[:allowed_text_chars].rstrip()
122
+ if trimmed_text:
123
+ kept.append((speaker, trimmed_text))
124
+ break
125
+
126
+ return kept
127
+
128
+
129
  def _tts_request(text: str, token: str) -> bytes | None:
130
  """Deprecated: Use OpenAI TTS via _synthesize_audio instead."""
131
  return None
 
144
 
145
  logger.info("TTS: Generating audio for %d lines (%d chars)", len(lines), len(full_text))
146
 
147
+ max_chars = MAX_TTS_INPUT_CHARS
148
  if len(full_text) > max_chars:
149
  trimmed = _chunk_text(full_text, max_chars)[0]
150
  logger.warning(
 
204
  ("Alex", "Thanks for listening — we'll be back soon!"),
205
  ]
206
 
207
+ original_line_count = len(lines)
208
+ lines = _truncate_lines_for_tts_limit(lines, MAX_TTS_INPUT_CHARS)
209
+ if len(lines) < original_line_count:
210
+ logger.warning(
211
+ "Podcast script truncated to fit %d-char TTS limit (%d -> %d lines)",
212
+ MAX_TTS_INPUT_CHARS,
213
+ original_line_count,
214
+ len(lines),
215
+ )
216
+
217
  markdown_script = _script_to_markdown(lines)
218
 
219
  audio_filename = f"podcast_{uuid.uuid4().hex[:12]}.mp3"