marks
commited on
Commit
·
b5f9861
1
Parent(s):
697ec60
Upgraded prompt
Browse files- prompt_templates.py +29 -0
- setup.py +5 -1
- text_processor.py +84 -0
- tts.py +4 -0
prompt_templates.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PODCAST_SYSTEM_PROMPT = """You are a professional podcast scriptwriter. Follow these rules strictly:
|
| 2 |
+
1. Write in natural, conversational prose only
|
| 3 |
+
2. Never use markdown formatting
|
| 4 |
+
3. Never write dialog or conversation format
|
| 5 |
+
4. Never use speaker labels, colons, or turn-taking
|
| 6 |
+
5. Never include stage directions or [bracketed text]
|
| 7 |
+
6. Never use asterisks, underscores, or other formatting symbols
|
| 8 |
+
7. Write as a continuous narrative
|
| 9 |
+
8. Avoid technical jargon unless explicitly explaining it
|
| 10 |
+
9. Use complete sentences and proper transitions
|
| 11 |
+
10. Never include URLs or raw links
|
| 12 |
+
|
| 13 |
+
Bad example:
|
| 14 |
+
John: This is interesting
|
| 15 |
+
[excited] Mary: I agree!
|
| 16 |
+
|
| 17 |
+
Good example:
|
| 18 |
+
This topic is particularly interesting, and there's strong agreement among experts about its significance.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
def create_podcast_prompt(topic, duration_minutes=10):
|
| 22 |
+
return f"""Using the style guidelines provided, create a {duration_minutes}-minute podcast script about {topic}.
|
| 23 |
+
Focus on creating engaging, flowing narrative content that a single voice can narrate naturally.
|
| 24 |
+
The content should be informative yet conversational, avoiding any formatting or dialog structure."""
|
| 25 |
+
|
| 26 |
+
def create_episode_segments(topic, segments=3):
|
| 27 |
+
return f"""Create {segments} distinct segments about {topic}.
|
| 28 |
+
Each segment should flow naturally into the next, using clear transitional phrases.
|
| 29 |
+
Remember to maintain a single narrative voice throughout."""
|
setup.py
CHANGED
|
@@ -6,6 +6,10 @@ setup(
|
|
| 6 |
packages=find_packages(),
|
| 7 |
install_requires=[
|
| 8 |
'rich',
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
]
|
| 11 |
)
|
|
|
|
| 6 |
packages=find_packages(),
|
| 7 |
install_requires=[
|
| 8 |
'rich',
|
| 9 |
+
'requests',
|
| 10 |
+
'python-dotenv',
|
| 11 |
+
'openai', # If using OpenAI
|
| 12 |
+
'anthropic', # If using Claude
|
| 13 |
+
'regex', # For more advanced regex operations
|
| 14 |
]
|
| 15 |
)
|
text_processor.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
def remove_dialog_formatting(text):
|
| 4 |
+
"""Remove common dialog markers and formatting."""
|
| 5 |
+
# Remove speaker labels (e.g., "John:", "JOHN:", "[John]:")
|
| 6 |
+
text = re.sub(r'^[A-Z0-9\[\]]+:', '', text, flags=re.MULTILINE)
|
| 7 |
+
text = re.sub(r'^[A-Z][a-z]+:', '', text, flags=re.MULTILINE)
|
| 8 |
+
|
| 9 |
+
# Remove parenthetical stage directions
|
| 10 |
+
text = re.sub(r'\([^)]*\)', '', text)
|
| 11 |
+
text = re.sub(r'\[[^\]]*\]', '', text)
|
| 12 |
+
|
| 13 |
+
return text
|
| 14 |
+
|
| 15 |
+
def remove_breakthrough_formatting(text):
|
| 16 |
+
"""Remove any LLM formatting that made it through the prompts."""
|
| 17 |
+
patterns = [
|
| 18 |
+
(r'^.*?:\s*', ''), # Remove any remaining speaker labels
|
| 19 |
+
(r'\[.*?\]', ''), # Remove any breakthrough brackets
|
| 20 |
+
(r'\(.*?\)', ''), # Remove any breakthrough parentheticals
|
| 21 |
+
(r'"\w+:"', ''), # Remove quoted speaker labels
|
| 22 |
+
(r'<.*?>', ''), # Remove any HTML-like tags
|
| 23 |
+
(r'---.*?---', ''), # Remove any section separators
|
| 24 |
+
(r'#\s*\w+', ''), # Remove any hashtag sections
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
for pattern, replacement in patterns:
|
| 28 |
+
text = re.sub(pattern, replacement, text, flags=re.MULTILINE)
|
| 29 |
+
return text
|
| 30 |
+
|
| 31 |
+
def convert_to_monologue(text):
|
| 32 |
+
"""Convert multi-party dialog into a flowing narrative."""
|
| 33 |
+
# Replace dialog markers with transitional phrases
|
| 34 |
+
transitions = [
|
| 35 |
+
"Then", "After that", "Next", "Following that",
|
| 36 |
+
"Subsequently", "Moving on", "Additionally"
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
lines = text.split('\n')
|
| 40 |
+
narrative = []
|
| 41 |
+
current_transition = 0
|
| 42 |
+
|
| 43 |
+
for line in lines:
|
| 44 |
+
if line.strip():
|
| 45 |
+
# Remove speaker labels if any
|
| 46 |
+
cleaned_line = re.sub(r'^[A-Z0-9\[\]]+:\s*', '', line)
|
| 47 |
+
cleaned_line = re.sub(r'^[A-Z][a-z]+:\s*', '', cleaned_line)
|
| 48 |
+
|
| 49 |
+
# Add transition if it seems like a new thought
|
| 50 |
+
if narrative and cleaned_line[0].isupper():
|
| 51 |
+
narrative.append(f"{transitions[current_transition]}, {cleaned_line.lower()}")
|
| 52 |
+
current_transition = (current_transition + 1) % len(transitions)
|
| 53 |
+
else:
|
| 54 |
+
narrative.append(cleaned_line)
|
| 55 |
+
|
| 56 |
+
return ' '.join(narrative)
|
| 57 |
+
|
| 58 |
+
def clean_formatting(text):
|
| 59 |
+
"""Remove markdown and other formatting symbols."""
|
| 60 |
+
# Remove markdown formatting
|
| 61 |
+
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # Bold
|
| 62 |
+
text = re.sub(r'\*(.+?)\*', r'\1', text) # Italic
|
| 63 |
+
text = re.sub(r'\_(.+?)\_', r'\1', text) # Underscore emphasis
|
| 64 |
+
text = re.sub(r'\~\~(.+?)\~\~', r'\1', text) # Strikethrough
|
| 65 |
+
|
| 66 |
+
# Remove code blocks and inline code
|
| 67 |
+
text = re.sub(r'```[\s\S]*?```', '', text)
|
| 68 |
+
text = re.sub(r'`[^`]*`', '', text)
|
| 69 |
+
|
| 70 |
+
return text
|
| 71 |
+
|
| 72 |
+
def process_for_podcast(text):
|
| 73 |
+
"""Main function to process text for podcast narration."""
|
| 74 |
+
text = remove_dialog_formatting(text)
|
| 75 |
+
text = clean_formatting(text)
|
| 76 |
+
text = remove_breakthrough_formatting(text)
|
| 77 |
+
text = convert_to_monologue(text)
|
| 78 |
+
|
| 79 |
+
# Additional cleanups
|
| 80 |
+
text = re.sub(r'\s+', ' ', text) # Remove multiple spaces
|
| 81 |
+
text = re.sub(r'\n+', ' ', text) # Remove newlines
|
| 82 |
+
text = text.strip()
|
| 83 |
+
|
| 84 |
+
return text
|
tts.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
| 1 |
import re
|
| 2 |
import requests
|
|
|
|
| 3 |
|
| 4 |
def clean_text_for_speech(text):
|
|
|
|
|
|
|
|
|
|
| 5 |
# Replace URLs with readable text
|
| 6 |
text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
|
| 7 |
' link ', text)
|
|
|
|
| 1 |
import re
|
| 2 |
import requests
|
| 3 |
+
from .text_processor import process_for_podcast
|
| 4 |
|
| 5 |
def clean_text_for_speech(text):
|
| 6 |
+
# First apply podcast-specific processing
|
| 7 |
+
text = process_for_podcast(text)
|
| 8 |
+
|
| 9 |
# Replace URLs with readable text
|
| 10 |
text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
|
| 11 |
' link ', text)
|