Spaces:

phxdev
/

podcaster

Runtime error

App Files Files Community

marks commited on Jan 28, 2025

Commit

b5f9861

1 Parent(s): 697ec60

Upgraded prompt

Browse files

Files changed (4) hide show

prompt_templates.py +29 -0
setup.py +5 -1
text_processor.py +84 -0
tts.py +4 -0

prompt_templates.py ADDED Viewed

	@@ -0,0 +1,29 @@

+PODCAST_SYSTEM_PROMPT = """You are a professional podcast scriptwriter. Follow these rules strictly:
+1. Write in natural, conversational prose only
+2. Never use markdown formatting
+3. Never write dialog or conversation format
+4. Never use speaker labels, colons, or turn-taking
+5. Never include stage directions or [bracketed text]
+6. Never use asterisks, underscores, or other formatting symbols
+7. Write as a continuous narrative
+8. Avoid technical jargon unless explicitly explaining it
+9. Use complete sentences and proper transitions
+10. Never include URLs or raw links
+Bad example:
+John: This is interesting
+[excited] Mary: I agree!
+Good example:
+This topic is particularly interesting, and there's strong agreement among experts about its significance.
+"""
+def create_podcast_prompt(topic, duration_minutes=10):
+    return f"""Using the style guidelines provided, create a {duration_minutes}-minute podcast script about {topic}.
+Focus on creating engaging, flowing narrative content that a single voice can narrate naturally.
+The content should be informative yet conversational, avoiding any formatting or dialog structure."""
+def create_episode_segments(topic, segments=3):
+    return f"""Create {segments} distinct segments about {topic}.
+Each segment should flow naturally into the next, using clear transitional phrases.
+Remember to maintain a single narrative voice throughout."""

setup.py CHANGED Viewed

@@ -6,6 +6,10 @@ setup(
     packages=find_packages(),
     install_requires=[
         'rich',
-        # add other dependencies here
     ]
 )

     packages=find_packages(),
     install_requires=[
         'rich',
+        'requests',
+        'python-dotenv',
+        'openai',  # If using OpenAI
+        'anthropic',  # If using Claude
+        'regex',  # For more advanced regex operations
     ]
 )

text_processor.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import re
+def remove_dialog_formatting(text):
+    """Remove common dialog markers and formatting."""
+    # Remove speaker labels (e.g., "John:", "JOHN:", "[John]:")
+    text = re.sub(r'^[A-Z0-9\[\]]+:', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^[A-Z][a-z]+:', '', text, flags=re.MULTILINE)
+    # Remove parenthetical stage directions
+    text = re.sub(r'\([^)]*\)', '', text)
+    text = re.sub(r'\[[^\]]*\]', '', text)
+    return text
+def remove_breakthrough_formatting(text):
+    """Remove any LLM formatting that made it through the prompts."""
+    patterns = [
+        (r'^.*?:\s*', ''),  # Remove any remaining speaker labels
+        (r'\[.*?\]', ''),   # Remove any breakthrough brackets
+        (r'\(.*?\)', ''),   # Remove any breakthrough parentheticals
+        (r'"\w+:"', ''),    # Remove quoted speaker labels
+        (r'<.*?>', ''),     # Remove any HTML-like tags
+        (r'---.*?---', ''), # Remove any section separators
+        (r'#\s*\w+', ''),   # Remove any hashtag sections
+    ]
+    for pattern, replacement in patterns:
+        text = re.sub(pattern, replacement, text, flags=re.MULTILINE)
+    return text
+def convert_to_monologue(text):
+    """Convert multi-party dialog into a flowing narrative."""
+    # Replace dialog markers with transitional phrases
+    transitions = [
+        "Then", "After that", "Next", "Following that",
+        "Subsequently", "Moving on", "Additionally"
+    ]
+    lines = text.split('\n')
+    narrative = []
+    current_transition = 0
+    for line in lines:
+        if line.strip():
+            # Remove speaker labels if any
+            cleaned_line = re.sub(r'^[A-Z0-9\[\]]+:\s*', '', line)
+            cleaned_line = re.sub(r'^[A-Z][a-z]+:\s*', '', cleaned_line)
+            # Add transition if it seems like a new thought
+            if narrative and cleaned_line[0].isupper():
+                narrative.append(f"{transitions[current_transition]}, {cleaned_line.lower()}")
+                current_transition = (current_transition + 1) % len(transitions)
+            else:
+                narrative.append(cleaned_line)
+    return ' '.join(narrative)
+def clean_formatting(text):
+    """Remove markdown and other formatting symbols."""
+    # Remove markdown formatting
+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)  # Bold
+    text = re.sub(r'\*(.+?)\*', r'\1', text)      # Italic
+    text = re.sub(r'\_(.+?)\_', r'\1', text)      # Underscore emphasis
+    text = re.sub(r'\~\~(.+?)\~\~', r'\1', text)  # Strikethrough
+    # Remove code blocks and inline code
+    text = re.sub(r'```[\s\S]*?```', '', text)
+    text = re.sub(r'`[^`]*`', '', text)
+    return text
+def process_for_podcast(text):
+    """Main function to process text for podcast narration."""
+    text = remove_dialog_formatting(text)
+    text = clean_formatting(text)
+    text = remove_breakthrough_formatting(text)
+    text = convert_to_monologue(text)
+    # Additional cleanups
+    text = re.sub(r'\s+', ' ', text)  # Remove multiple spaces
+    text = re.sub(r'\n+', ' ', text)  # Remove newlines
+    text = text.strip()
+    return text

tts.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import re
 import requests
 def clean_text_for_speech(text):
     # Replace URLs with readable text
     text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
                   ' link ', text)

 import re
 import requests
+from .text_processor import process_for_podcast
 def clean_text_for_speech(text):
+    # First apply podcast-specific processing
+    text = process_for_podcast(text)
     # Replace URLs with readable text
     text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
                   ' link ', text)