marks commited on
Commit
a7eb60c
·
2 Parent(s): a352c42 5290967

Merge branch 'main' of https://huggingface.co/spaces/phxdev/podcaster

Browse files
Files changed (5) hide show
  1. api_clients.py +21 -15
  2. app.py +6 -8
  3. models.py +2 -6
  4. requirements.txt +3 -1
  5. scraper.py +38 -21
api_clients.py CHANGED
@@ -5,7 +5,7 @@ import elevenlabs
5
  import time
6
  from contextlib import asynccontextmanager
7
  from logger import setup_logger, log_execution_time, log_async_execution_time
8
- from models import OpenRouterRequest, OpenRouterResponse, Message, OpenRouterModel
9
 
10
  logger = setup_logger("api_clients")
11
 
@@ -49,6 +49,8 @@ class OpenRouterClient:
49
  self.base_url = "https://openrouter.ai/api/v1"
50
  self.headers = {
51
  "Authorization": f"Bearer {api_key}",
 
 
52
  "Content-Type": "application/json"
53
  }
54
  logger.debug("OpenRouter client initialized successfully")
@@ -66,6 +68,8 @@ class OpenRouterClient:
66
  # Update headers when API key changes
67
  self.headers = {
68
  "Authorization": f"Bearer {value}",
 
 
69
  "Content-Type": "application/json",
70
  }
71
  logger.info("OpenRouter API key updated successfully")
@@ -82,10 +86,7 @@ class OpenRouterClient:
82
  Fetch available models from OpenRouter API using pydantic models
83
 
84
  Returns:
85
- List of tuples containing (model_id, model_description)
86
-
87
- Raises:
88
- ValueError: If API request fails
89
  """
90
  logger.info("Fetching available models from OpenRouter")
91
  async with self.get_session() as session:
@@ -94,7 +95,7 @@ class OpenRouterClient:
94
  data = await response.json()
95
  models = [OpenRouterModel(**model) for model in data["data"]]
96
  logger.info(f"Successfully fetched {len(models)} models")
97
- return [(model.id, model.name) for model in models]
98
 
99
  @log_async_execution_time(logger)
100
  async def generate_script(self, content: str, prompt: str, model_id: str) -> str:
@@ -158,12 +159,16 @@ Focus on making it engaging and natural to listen to."""
158
  async with self.get_session() as session:
159
  async with session.post(
160
  f"{self.base_url}/chat/completions",
161
- json=request.dict()
162
  ) as response:
163
- response.raise_for_status()
 
 
 
 
164
  data = await response.json()
165
- router_response = OpenRouterResponse(**data)
166
- return router_response.choices[0].message.content
167
  except Exception as e:
168
  logger.error(f"Script generation failed", exc_info=True)
169
  raise
@@ -184,16 +189,17 @@ class ElevenLabsClient:
184
  try:
185
  voices = elevenlabs.voices()
186
  return [(
187
- voice.voice_id, # Value (hidden from user)
188
  f"{voice.name} ({voice.labels.get('accent', 'No accent')})" +
189
- (f" - {voice.description[:50]}..." if voice.description else "")
 
190
  ) for voice in voices]
191
  except Exception as e:
192
  logger.error("Failed to fetch voices from ElevenLabs", exc_info=True)
193
  raise
194
 
195
- async def generate_audio(self, text: str, voice_id: str):
196
- """Asynchronously generate audio"""
197
  logger.info(f"Starting audio generation with voice: {voice_id}")
198
  logger.debug(f"Input text length: {len(text)} chars")
199
 
@@ -202,7 +208,7 @@ class ElevenLabsClient:
202
 
203
  try:
204
  start_time = time.time()
205
- audio = await elevenlabs.generate( # Assuming elevenlabs supports async
206
  text=text,
207
  voice=voice_id,
208
  model="eleven_monolingual_v1"
 
5
  import time
6
  from contextlib import asynccontextmanager
7
  from logger import setup_logger, log_execution_time, log_async_execution_time
8
+ from models import OpenRouterModel
9
 
10
  logger = setup_logger("api_clients")
11
 
 
49
  self.base_url = "https://openrouter.ai/api/v1"
50
  self.headers = {
51
  "Authorization": f"Bearer {api_key}",
52
+ "HTTP-Referer": "https://localhost:7860", # Required by OpenRouter
53
+ "X-Title": "URL to Podcast Generator", # Required by OpenRouter
54
  "Content-Type": "application/json"
55
  }
56
  logger.debug("OpenRouter client initialized successfully")
 
68
  # Update headers when API key changes
69
  self.headers = {
70
  "Authorization": f"Bearer {value}",
71
+ "HTTP-Referer": "https://localhost:7860",
72
+ "X-Title": "URL to Podcast Generator",
73
  "Content-Type": "application/json",
74
  }
75
  logger.info("OpenRouter API key updated successfully")
 
86
  Fetch available models from OpenRouter API using pydantic models
87
 
88
  Returns:
89
+ List of tuples containing (model_id, model_id) where both values are the same
 
 
 
90
  """
91
  logger.info("Fetching available models from OpenRouter")
92
  async with self.get_session() as session:
 
95
  data = await response.json()
96
  models = [OpenRouterModel(**model) for model in data["data"]]
97
  logger.info(f"Successfully fetched {len(models)} models")
98
+ return [(model.name, model.id) for model in models]
99
 
100
  @log_async_execution_time(logger)
101
  async def generate_script(self, content: str, prompt: str, model_id: str) -> str:
 
159
  async with self.get_session() as session:
160
  async with session.post(
161
  f"{self.base_url}/chat/completions",
162
+ json=request_data
163
  ) as response:
164
+ if response.status != 200:
165
+ error_text = await response.text()
166
+ logger.error(f"OpenRouter API error: {error_text}")
167
+ raise ValueError(f"API request failed: {error_text}")
168
+
169
  data = await response.json()
170
+ return data['choices'][0]['message']['content']
171
+
172
  except Exception as e:
173
  logger.error(f"Script generation failed", exc_info=True)
174
  raise
 
189
  try:
190
  voices = elevenlabs.voices()
191
  return [(
192
+
193
  f"{voice.name} ({voice.labels.get('accent', 'No accent')})" +
194
+ (f" - {voice.description[:50]}..." if voice.description else ""),
195
+ voice.voice_id # Value (hidden from user)
196
  ) for voice in voices]
197
  except Exception as e:
198
  logger.error("Failed to fetch voices from ElevenLabs", exc_info=True)
199
  raise
200
 
201
+ def generate_audio(self, text: str, voice_id: str):
202
+ """Generate audio synchronously"""
203
  logger.info(f"Starting audio generation with voice: {voice_id}")
204
  logger.debug(f"Input text length: {len(text)} chars")
205
 
 
208
 
209
  try:
210
  start_time = time.time()
211
+ audio = elevenlabs.generate(
212
  text=text,
213
  voice=voice_id,
214
  model="eleven_monolingual_v1"
app.py CHANGED
@@ -43,8 +43,8 @@ class PodcasterUI:
43
  # Generate script using the scraped content
44
  script = await self.router_client.generate_script(webpage_content, prompt, model_id)
45
 
46
- # Generate audio from the script
47
- audio = await self.elevenlabs_client.generate_audio(script, voice_id)
48
  return script, audio
49
  except Exception as e:
50
  logger.error("Failed to generate podcast", exc_info=True)
@@ -66,17 +66,15 @@ class PodcasterUI:
66
  with gr.Column():
67
  openrouter_model = gr.Dropdown(
68
  label='AI Model',
69
- choices=[(name, id) for id, name in self.models], # Swap order for display
70
- value=self.models[0][1] if len(self.models) > 1 else None,
71
- type="index" # Use index to get the second element (id) from tuple
72
  )
73
 
74
  with gr.Column():
75
  voice_model = gr.Dropdown(
76
  label='Voice',
77
- choices=[(name, id) for id, name in self.voices], # Swap order for display
78
- value=self.voices[0][1] if len(self.voices) > 1 else None,
79
- type="index" # Use index to get the second element (id) from tuple
80
  )
81
 
82
  prompt_input = gr.Textbox(
 
43
  # Generate script using the scraped content
44
  script = await self.router_client.generate_script(webpage_content, prompt, model_id)
45
 
46
+ # Generate audio from the script (now synchronous)
47
+ audio = self.elevenlabs_client.generate_audio(script, voice_id)
48
  return script, audio
49
  except Exception as e:
50
  logger.error("Failed to generate podcast", exc_info=True)
 
66
  with gr.Column():
67
  openrouter_model = gr.Dropdown(
68
  label='AI Model',
69
+ choices=self.models, # Each choice now has same id/display value
70
+ value=self.models[0][0] if len(self.models) > 1 else None,
 
71
  )
72
 
73
  with gr.Column():
74
  voice_model = gr.Dropdown(
75
  label='Voice',
76
+ choices=[(id, name) for id, name in self.voices],
77
+ value=self.voices[0][0] if len(self.voices) > 1 else None,
 
78
  )
79
 
80
  prompt_input = gr.Textbox(
models.py CHANGED
@@ -9,15 +9,11 @@ class OpenRouterRequest(BaseModel):
9
  model: str
10
  messages: List[Message]
11
 
12
- class Choice(BaseModel):
13
  message: Message
14
- index: int = 0
15
- finish_reason: Optional[str] = None
16
 
17
  class OpenRouterResponse(BaseModel):
18
- id: str
19
- choices: List[Choice]
20
- model: str
21
 
22
  class OpenRouterModel(BaseModel):
23
  id: str
 
9
  model: str
10
  messages: List[Message]
11
 
12
+ class OpenRouterChoice(BaseModel):
13
  message: Message
 
 
14
 
15
  class OpenRouterResponse(BaseModel):
16
+ choices: List[OpenRouterChoice]
 
 
17
 
18
  class OpenRouterModel(BaseModel):
19
  id: str
requirements.txt CHANGED
@@ -10,4 +10,6 @@ uvicorn
10
  fastapi
11
  langchain_anthropic
12
  langchain_openai
13
- langchain_google_genai
 
 
 
10
  fastapi
11
  langchain_anthropic
12
  langchain_openai
13
+ langchain_google_genai
14
+ scrapingbee
15
+
scraper.py CHANGED
@@ -1,24 +1,41 @@
1
- def scrape_url(url):
2
- from browser_use import Browser
3
- from bs4 import BeautifulSoup
 
4
 
5
- # Initialize the browser
6
- browser = Browser()
7
 
8
- # Open the URL
9
- browser.open(url)
10
 
11
- # Get the page content
12
- content = browser.get_page_source()
13
-
14
- # Close the browser
15
- browser.close()
16
-
17
- # Parse the HTML content
18
- soup = BeautifulSoup(content, 'html.parser')
19
-
20
- # Extract relevant text (modify the selector as needed)
21
- text_elements = soup.find_all(['main'])
22
- text_content = ' '.join([element.get_text() for element in text_elements])
23
-
24
- return text_content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from scrapingbee import ScrapingBeeClient
3
+ from logger import setup_logger
4
+ import json
5
 
6
+ logger = setup_logger("scraper")
 
7
 
8
+ # Initialize the ScrapingBee client with API key
9
+ client = ScrapingBeeClient(api_key=os.getenv('SCRAPINGBEE_API_KEY', ''))
10
 
11
+ def scrape_url(url: str) -> str:
12
+ """
13
+ Scrape content from URL using ScrapingBee with AI extraction
14
+
15
+ Args:
16
+ url: The URL to scrape
17
+
18
+ Returns:
19
+ str: Extracted text content or error message
20
+ """
21
+ try:
22
+ logger.info(f"Scraping URL: {url}")
23
+ response = client.get(
24
+ url,
25
+ params={
26
+ 'stealth_proxy': True,
27
+ 'country_code': 'us',
28
+ 'ai_query': 'Extract the main text content from this page'
29
+ }
30
+ )
31
+
32
+ if response.status_code == 200:
33
+ logger.info(f"Successfully scraped URL: {url}")
34
+ return response.text if response.text else "No content could be extracted from the URL"
35
+ else:
36
+ logger.error(f"Failed to scrape URL: {url}, Status: {response.status_code}")
37
+ return f"Failed to download the URL. Status code: {response.status_code}"
38
+
39
+ except Exception as e:
40
+ logger.error(f"Error scraping URL: {url}", exc_info=True)
41
+ return f"Error scraping the URL: {str(e)}"