Spaces:

Group-1-5010
/

NotebookLM

Sleeping

App Files Files Community

idacosta commited on Feb 28

Commit

c9e812c

1 Parent(s): 2e4ee45

Fix podcast playback and enforce TTS length limit

Browse files

Files changed (1) hide show

services/podcast_service.py +39 -1

services/podcast_service.py CHANGED Viewed

@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
 MODEL = "claude-haiku-4-5-20251001"
 MAX_TOKENS = 2048
 # Use facebook/mms-tts-eng for both — it's free and reliable
 # We differentiate voices by slightly modifying the text (different speaking rates aren't
@@ -98,6 +99,33 @@ def _chunk_text(text: str, max_chars: int = TTS_CHUNK_CHARS) -> list[str]:
     return chunks
 def _tts_request(text: str, token: str) -> bytes | None:
     """Deprecated: Use OpenAI TTS via _synthesize_audio instead."""
     return None
@@ -116,7 +144,7 @@ def _synthesize_audio(lines: list[tuple[str, str]], output_path: Path) -> bool:
         logger.info("TTS: Generating audio for %d lines (%d chars)", len(lines), len(full_text))
-        max_chars = 4000
         if len(full_text) > max_chars:
             trimmed = _chunk_text(full_text, max_chars)[0]
             logger.warning(
@@ -176,6 +204,16 @@ def generate_podcast(notebook: Notebook, summary_content: str) -> Artifact:
             ("Alex", "Thanks for listening — we'll be back soon!"),
         ]
     markdown_script = _script_to_markdown(lines)
     audio_filename = f"podcast_{uuid.uuid4().hex[:12]}.mp3"

 MODEL = "claude-haiku-4-5-20251001"
 MAX_TOKENS = 2048
+MAX_TTS_INPUT_CHARS = 4096
 # Use facebook/mms-tts-eng for both — it's free and reliable
 # We differentiate voices by slightly modifying the text (different speaking rates aren't
     return chunks
+def _truncate_lines_for_tts_limit(lines: list[tuple[str, str]], max_chars: int = MAX_TTS_INPUT_CHARS) -> list[tuple[str, str]]:
+    """Ensure speaker lines serialize to <= max_chars for OpenAI TTS input."""
+    kept: list[tuple[str, str]] = []
+    current_len = 0
+    for speaker, text in lines:
+        line = f"{speaker}: {text}"
+        line_len = len(line)
+        sep_len = 1 if kept else 0
+        if current_len + sep_len + line_len <= max_chars:
+            kept.append((speaker, text))
+            current_len += sep_len + line_len
+            continue
+        remaining = max_chars - current_len - sep_len
+        if remaining > len(f"{speaker}: "):
+            prefix = f"{speaker}: "
+            allowed_text_chars = remaining - len(prefix)
+            trimmed_text = text[:allowed_text_chars].rstrip()
+            if trimmed_text:
+                kept.append((speaker, trimmed_text))
+        break
+    return kept
 def _tts_request(text: str, token: str) -> bytes | None:
     """Deprecated: Use OpenAI TTS via _synthesize_audio instead."""
     return None
         logger.info("TTS: Generating audio for %d lines (%d chars)", len(lines), len(full_text))
+        max_chars = MAX_TTS_INPUT_CHARS
         if len(full_text) > max_chars:
             trimmed = _chunk_text(full_text, max_chars)[0]
             logger.warning(
             ("Alex", "Thanks for listening — we'll be back soon!"),
         ]
+    original_line_count = len(lines)
+    lines = _truncate_lines_for_tts_limit(lines, MAX_TTS_INPUT_CHARS)
+    if len(lines) < original_line_count:
+        logger.warning(
+            "Podcast script truncated to fit %d-char TTS limit (%d -> %d lines)",
+            MAX_TTS_INPUT_CHARS,
+            original_line_count,
+            len(lines),
+        )
     markdown_script = _script_to_markdown(lines)
     audio_filename = f"podcast_{uuid.uuid4().hex[:12]}.mp3"