marks commited on
Commit
a352c42
·
1 Parent(s): c15b647

Enhancements

Browse files
Files changed (3) hide show
  1. api_clients.py +65 -3
  2. podcast_generator.py +0 -9
  3. tts.py +0 -23
api_clients.py CHANGED
@@ -1,5 +1,5 @@
1
  from functools import lru_cache
2
- from typing import List, Tuple, Optional
3
  import aiohttp
4
  import elevenlabs
5
  import time
@@ -9,6 +9,37 @@ from models import OpenRouterRequest, OpenRouterResponse, Message, OpenRouterMod
9
 
10
  logger = setup_logger("api_clients")
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  class OpenRouterClient:
13
  """Handles OpenRouter API interactions with comprehensive logging and error tracking"""
14
 
@@ -84,12 +115,43 @@ class OpenRouterClient:
84
  logger.error("Prompt too short or missing")
85
  raise ValueError("Please provide a more detailed prompt")
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  try:
88
  request = OpenRouterRequest(
89
  model=model_id,
90
  messages=[
91
- Message(role="system", content="You are a podcast script writer."),
92
- Message(role="user", content=f"Create a podcast script from this content: {content}")
93
  ]
94
  )
95
 
 
1
  from functools import lru_cache
2
+ from typing import List, Tuple, Optional, Dict
3
  import aiohttp
4
  import elevenlabs
5
  import time
 
9
 
10
  logger = setup_logger("api_clients")
11
 
12
+ def preprocess_text(text: str) -> str:
13
+ """
14
+ Clean and format text by removing unwanted characters and formatting
15
+
16
+ Args:
17
+ text: Raw input text
18
+
19
+ Returns:
20
+ Cleaned text suitable for podcast generation
21
+ """
22
+ import re
23
+
24
+ # Remove markdown-style formatting
25
+ text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # Bold
26
+ text = re.sub(r'\*(.+?)\*', r'\1', text) # Italic
27
+ text = re.sub(r'__(.+?)__', r'\1', text) # Underline
28
+ text = re.sub(r'~~(.+?)~~', r'\1', text) # Strikethrough
29
+
30
+ # Remove action blocks and special formatting
31
+ text = re.sub(r'\[.*?\]', '', text) # Remove [actions]
32
+ text = re.sub(r'\{.*?\}', '', text) # Remove {actions}
33
+ text = re.sub(r'<.*?>', '', text) # Remove <actions>
34
+
35
+ # Remove podcast-specific formatting
36
+ text = re.sub(r'\((?:pause|break|music|sfx|sound effect|jingle).*?\)', '', text, flags=re.IGNORECASE)
37
+ text = re.sub(r'\((host|speaker|guest)\s*\d*\s*:?\)', '', text, flags=re.IGNORECASE)
38
+ text = re.sub(r'#\s*\d+\s*[:.-]', '', text) # Remove segment numbers
39
+
40
+ # Clean up whitespace
41
+ return ' '.join(text.split())
42
+
43
  class OpenRouterClient:
44
  """Handles OpenRouter API interactions with comprehensive logging and error tracking"""
45
 
 
115
  logger.error("Prompt too short or missing")
116
  raise ValueError("Please provide a more detailed prompt")
117
 
118
+ # Clean input text
119
+ cleaned_content = preprocess_text(content)
120
+ cleaned_prompt = preprocess_text(prompt)
121
+
122
+ system_prompt = """You are an expert podcast script writer. Your task is to create engaging,
123
+ natural-sounding podcast scripts that flow conversationally while being informative and engaging.
124
+
125
+ Follow these guidelines:
126
+ 1. Write in a conversational, natural speaking style that sounds authentic
127
+ 2. Break complex topics into digestible segments with clear transitions
128
+ 3. Avoid technical jargon unless necessary, explaining complex terms when used
129
+ 4. Use natural speech patterns:
130
+ - Contractions (I'm, we're, let's)
131
+ - Casual language
132
+ - Rhetorical questions to engage listeners
133
+ 5. Include brief pauses for emphasis and pacing (but don't mark them explicitly)
134
+ 6. Incorporate storytelling elements to maintain engagement
135
+ 7. End with a clear conclusion and call-to-action
136
+ 8. Keep paragraphs short and focused for easier delivery
137
+ 9. Use simple sentence structures that flow naturally when spoken
138
+
139
+ Format the script for natural speech, avoiding any special characters or formatting."""
140
+
141
+ user_prompt = f"""Create a podcast script based on the following topic and content:
142
+
143
+ Topic: {cleaned_prompt}
144
+
145
+ Content to cover: {cleaned_content}
146
+
147
+ Focus on making it engaging and natural to listen to."""
148
+
149
  try:
150
  request = OpenRouterRequest(
151
  model=model_id,
152
  messages=[
153
+ Message(role="system", content=system_prompt),
154
+ Message(role="user", content=user_prompt)
155
  ]
156
  )
157
 
podcast_generator.py DELETED
@@ -1,9 +0,0 @@
1
- class PodcastGenerator:
2
- def __init__(self, model_client):
3
- self.model_client = model_client
4
-
5
- def generate_podcast(self, scraped_content):
6
- prompt = f"Create a podcast episode based on the following content: {scraped_content}"
7
- response = self.model_client.generate(prompt, max_length=300) # Assuming 300 tokens is roughly 3 minutes
8
- podcast_text = response.get('text', '')
9
- return podcast_text.strip()
 
 
 
 
 
 
 
 
 
 
tts.py DELETED
@@ -1,23 +0,0 @@
1
- def text_to_speech(text, api_key):
2
- import requests
3
-
4
- url = "https://api.elevenlabs.io/v1/text-to-speech"
5
- headers = {
6
- "Authorization": f"Bearer {api_key}",
7
- "Content-Type": "application/json"
8
- }
9
- data = {
10
- "text": text,
11
- "voice": "en_us_male", # Specify the desired voice
12
- "output_format": "mp3" # Specify the desired output format
13
- }
14
-
15
- response = requests.post(url, headers=headers, json=data)
16
-
17
- if response.status_code == 200:
18
- audio_content = response.content
19
- with open("podcast_episode.mp3", "wb") as audio_file:
20
- audio_file.write(audio_content)
21
- return "podcast_episode.mp3"
22
- else:
23
- raise Exception(f"Error: {response.status_code}, {response.text}")