Spaces:

Group-1-5010
/

NotebookLM

Running

App Files Files Community

idacosta commited on 20 days ago

Commit

c72ee4a

1 Parent(s): c9e812c

Constrain podcast script length via model prompt and rewrite pass

Browse files

Files changed (1) hide show

services/podcast_service.py +35 -0

services/podcast_service.py CHANGED Viewed

@@ -18,6 +18,7 @@ logger = logging.getLogger(__name__)
 MODEL = "claude-haiku-4-5-20251001"
 MAX_TOKENS = 2048
 MAX_TTS_INPUT_CHARS = 4096
 # Use facebook/mms-tts-eng for both — it's free and reliable
 # We differentiate voices by slightly modifying the text (different speaking rates aren't
@@ -47,6 +48,8 @@ FORMAT RULES (strictly follow these):
 - No stage directions, no asterisks, no markdown, no headers
 - Each line is one continuous paragraph — no line breaks within a turn
 - End with a closing exchange where both hosts wrap up
 Example format:
 Alex: Hey everyone, welcome back! Today we're diving into something really fascinating. Sam, want to kick us off?
@@ -55,6 +58,10 @@ Sam: Absolutely! So the big idea here is...
 Now write the podcast script:"""
 def _parse_script_lines(raw: str) -> list[tuple[str, str]]:
     lines = []
     for line in raw.strip().splitlines():
@@ -193,6 +200,34 @@ def generate_podcast(notebook: Notebook, summary_content: str) -> Artifact:
         raw_script = response.content[0].text or ""
         lines = _parse_script_lines(raw_script)
         if not lines:
             raise ValueError("No speaker lines parsed from response.")

 MODEL = "claude-haiku-4-5-20251001"
 MAX_TOKENS = 2048
 MAX_TTS_INPUT_CHARS = 4096
+TARGET_SCRIPT_CHARS = 3900
 # Use facebook/mms-tts-eng for both — it's free and reliable
 # We differentiate voices by slightly modifying the text (different speaking rates aren't
 - No stage directions, no asterisks, no markdown, no headers
 - Each line is one continuous paragraph — no line breaks within a turn
 - End with a closing exchange where both hosts wrap up
+- Keep the ENTIRE script under {TARGET_SCRIPT_CHARS} characters total (including speaker labels)
+- If needed, prioritize clarity and completeness over extra detail so the script ends naturally
 Example format:
 Alex: Hey everyone, welcome back! Today we're diving into something really fascinating. Sam, want to kick us off?
 Now write the podcast script:"""
+def _script_char_len(lines: list[tuple[str, str]]) -> int:
+    return len("\n".join([f"{speaker}: {text}" for speaker, text in lines]))
 def _parse_script_lines(raw: str) -> list[tuple[str, str]]:
     lines = []
     for line in raw.strip().splitlines():
         raw_script = response.content[0].text or ""
         lines = _parse_script_lines(raw_script)
+        if lines and _script_char_len(lines) > TARGET_SCRIPT_CHARS:
+            logger.warning(
+                "Podcast script exceeded target size (%d chars), requesting concise rewrite",
+                _script_char_len(lines),
+            )
+            rewrite_prompt = (
+                f"Rewrite this podcast script so it is <= {TARGET_SCRIPT_CHARS} characters total, "
+                "keeps the same key points, and ends with a natural closing exchange.\n\n"
+                "RULES:\n"
+                "- Keep the same Alex/Sam format\n"
+                "- Every line must start with Alex: or Sam:\n"
+                "- No markdown or stage directions\n\n"
+                f"SCRIPT TO REWRITE:\n{raw_script}"
+            )
+            rewrite_response = client.messages.create(
+                model=MODEL,
+                max_tokens=MAX_TOKENS,
+                system=(
+                    "You are a podcast editor. Compress scripts while preserving meaning and flow. "
+                    "Output only Alex/Sam dialogue lines."
+                ),
+                messages=[{"role": "user", "content": rewrite_prompt}],
+            )
+            rewritten = rewrite_response.content[0].text or ""
+            rewritten_lines = _parse_script_lines(rewritten)
+            if rewritten_lines:
+                lines = rewritten_lines
         if not lines:
             raise ValueError("No speaker lines parsed from response.")