Merge branch 'main' of https://huggingface.co/spaces/phxdev/podcaster
Browse files- api_clients.py +21 -15
- app.py +6 -8
- models.py +2 -6
- requirements.txt +3 -1
- scraper.py +38 -21
api_clients.py
CHANGED
|
@@ -5,7 +5,7 @@ import elevenlabs
|
|
| 5 |
import time
|
| 6 |
from contextlib import asynccontextmanager
|
| 7 |
from logger import setup_logger, log_execution_time, log_async_execution_time
|
| 8 |
-
from models import
|
| 9 |
|
| 10 |
logger = setup_logger("api_clients")
|
| 11 |
|
|
@@ -49,6 +49,8 @@ class OpenRouterClient:
|
|
| 49 |
self.base_url = "https://openrouter.ai/api/v1"
|
| 50 |
self.headers = {
|
| 51 |
"Authorization": f"Bearer {api_key}",
|
|
|
|
|
|
|
| 52 |
"Content-Type": "application/json"
|
| 53 |
}
|
| 54 |
logger.debug("OpenRouter client initialized successfully")
|
|
@@ -66,6 +68,8 @@ class OpenRouterClient:
|
|
| 66 |
# Update headers when API key changes
|
| 67 |
self.headers = {
|
| 68 |
"Authorization": f"Bearer {value}",
|
|
|
|
|
|
|
| 69 |
"Content-Type": "application/json",
|
| 70 |
}
|
| 71 |
logger.info("OpenRouter API key updated successfully")
|
|
@@ -82,10 +86,7 @@ class OpenRouterClient:
|
|
| 82 |
Fetch available models from OpenRouter API using pydantic models
|
| 83 |
|
| 84 |
Returns:
|
| 85 |
-
List of tuples containing (model_id,
|
| 86 |
-
|
| 87 |
-
Raises:
|
| 88 |
-
ValueError: If API request fails
|
| 89 |
"""
|
| 90 |
logger.info("Fetching available models from OpenRouter")
|
| 91 |
async with self.get_session() as session:
|
|
@@ -94,7 +95,7 @@ class OpenRouterClient:
|
|
| 94 |
data = await response.json()
|
| 95 |
models = [OpenRouterModel(**model) for model in data["data"]]
|
| 96 |
logger.info(f"Successfully fetched {len(models)} models")
|
| 97 |
-
return [(model.
|
| 98 |
|
| 99 |
@log_async_execution_time(logger)
|
| 100 |
async def generate_script(self, content: str, prompt: str, model_id: str) -> str:
|
|
@@ -158,12 +159,16 @@ Focus on making it engaging and natural to listen to."""
|
|
| 158 |
async with self.get_session() as session:
|
| 159 |
async with session.post(
|
| 160 |
f"{self.base_url}/chat/completions",
|
| 161 |
-
json=
|
| 162 |
) as response:
|
| 163 |
-
response.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
data = await response.json()
|
| 165 |
-
|
| 166 |
-
|
| 167 |
except Exception as e:
|
| 168 |
logger.error(f"Script generation failed", exc_info=True)
|
| 169 |
raise
|
|
@@ -184,16 +189,17 @@ class ElevenLabsClient:
|
|
| 184 |
try:
|
| 185 |
voices = elevenlabs.voices()
|
| 186 |
return [(
|
| 187 |
-
|
| 188 |
f"{voice.name} ({voice.labels.get('accent', 'No accent')})" +
|
| 189 |
-
(f" - {voice.description[:50]}..." if voice.description else "")
|
|
|
|
| 190 |
) for voice in voices]
|
| 191 |
except Exception as e:
|
| 192 |
logger.error("Failed to fetch voices from ElevenLabs", exc_info=True)
|
| 193 |
raise
|
| 194 |
|
| 195 |
-
|
| 196 |
-
"""
|
| 197 |
logger.info(f"Starting audio generation with voice: {voice_id}")
|
| 198 |
logger.debug(f"Input text length: {len(text)} chars")
|
| 199 |
|
|
@@ -202,7 +208,7 @@ class ElevenLabsClient:
|
|
| 202 |
|
| 203 |
try:
|
| 204 |
start_time = time.time()
|
| 205 |
-
audio =
|
| 206 |
text=text,
|
| 207 |
voice=voice_id,
|
| 208 |
model="eleven_monolingual_v1"
|
|
|
|
| 5 |
import time
|
| 6 |
from contextlib import asynccontextmanager
|
| 7 |
from logger import setup_logger, log_execution_time, log_async_execution_time
|
| 8 |
+
from models import OpenRouterModel
|
| 9 |
|
| 10 |
logger = setup_logger("api_clients")
|
| 11 |
|
|
|
|
| 49 |
self.base_url = "https://openrouter.ai/api/v1"
|
| 50 |
self.headers = {
|
| 51 |
"Authorization": f"Bearer {api_key}",
|
| 52 |
+
"HTTP-Referer": "https://localhost:7860", # Required by OpenRouter
|
| 53 |
+
"X-Title": "URL to Podcast Generator", # Required by OpenRouter
|
| 54 |
"Content-Type": "application/json"
|
| 55 |
}
|
| 56 |
logger.debug("OpenRouter client initialized successfully")
|
|
|
|
| 68 |
# Update headers when API key changes
|
| 69 |
self.headers = {
|
| 70 |
"Authorization": f"Bearer {value}",
|
| 71 |
+
"HTTP-Referer": "https://localhost:7860",
|
| 72 |
+
"X-Title": "URL to Podcast Generator",
|
| 73 |
"Content-Type": "application/json",
|
| 74 |
}
|
| 75 |
logger.info("OpenRouter API key updated successfully")
|
|
|
|
| 86 |
Fetch available models from OpenRouter API using pydantic models
|
| 87 |
|
| 88 |
Returns:
|
| 89 |
+
List of tuples containing (model_id, model_id) where both values are the same
|
|
|
|
|
|
|
|
|
|
| 90 |
"""
|
| 91 |
logger.info("Fetching available models from OpenRouter")
|
| 92 |
async with self.get_session() as session:
|
|
|
|
| 95 |
data = await response.json()
|
| 96 |
models = [OpenRouterModel(**model) for model in data["data"]]
|
| 97 |
logger.info(f"Successfully fetched {len(models)} models")
|
| 98 |
+
return [(model.name, model.id) for model in models]
|
| 99 |
|
| 100 |
@log_async_execution_time(logger)
|
| 101 |
async def generate_script(self, content: str, prompt: str, model_id: str) -> str:
|
|
|
|
| 159 |
async with self.get_session() as session:
|
| 160 |
async with session.post(
|
| 161 |
f"{self.base_url}/chat/completions",
|
| 162 |
+
json=request_data
|
| 163 |
) as response:
|
| 164 |
+
if response.status != 200:
|
| 165 |
+
error_text = await response.text()
|
| 166 |
+
logger.error(f"OpenRouter API error: {error_text}")
|
| 167 |
+
raise ValueError(f"API request failed: {error_text}")
|
| 168 |
+
|
| 169 |
data = await response.json()
|
| 170 |
+
return data['choices'][0]['message']['content']
|
| 171 |
+
|
| 172 |
except Exception as e:
|
| 173 |
logger.error(f"Script generation failed", exc_info=True)
|
| 174 |
raise
|
|
|
|
| 189 |
try:
|
| 190 |
voices = elevenlabs.voices()
|
| 191 |
return [(
|
| 192 |
+
|
| 193 |
f"{voice.name} ({voice.labels.get('accent', 'No accent')})" +
|
| 194 |
+
(f" - {voice.description[:50]}..." if voice.description else ""),
|
| 195 |
+
voice.voice_id # Value (hidden from user)
|
| 196 |
) for voice in voices]
|
| 197 |
except Exception as e:
|
| 198 |
logger.error("Failed to fetch voices from ElevenLabs", exc_info=True)
|
| 199 |
raise
|
| 200 |
|
| 201 |
+
def generate_audio(self, text: str, voice_id: str):
|
| 202 |
+
"""Generate audio synchronously"""
|
| 203 |
logger.info(f"Starting audio generation with voice: {voice_id}")
|
| 204 |
logger.debug(f"Input text length: {len(text)} chars")
|
| 205 |
|
|
|
|
| 208 |
|
| 209 |
try:
|
| 210 |
start_time = time.time()
|
| 211 |
+
audio = elevenlabs.generate(
|
| 212 |
text=text,
|
| 213 |
voice=voice_id,
|
| 214 |
model="eleven_monolingual_v1"
|
app.py
CHANGED
|
@@ -43,8 +43,8 @@ class PodcasterUI:
|
|
| 43 |
# Generate script using the scraped content
|
| 44 |
script = await self.router_client.generate_script(webpage_content, prompt, model_id)
|
| 45 |
|
| 46 |
-
# Generate audio from the script
|
| 47 |
-
audio =
|
| 48 |
return script, audio
|
| 49 |
except Exception as e:
|
| 50 |
logger.error("Failed to generate podcast", exc_info=True)
|
|
@@ -66,17 +66,15 @@ class PodcasterUI:
|
|
| 66 |
with gr.Column():
|
| 67 |
openrouter_model = gr.Dropdown(
|
| 68 |
label='AI Model',
|
| 69 |
-
choices=
|
| 70 |
-
value=self.models[0][
|
| 71 |
-
type="index" # Use index to get the second element (id) from tuple
|
| 72 |
)
|
| 73 |
|
| 74 |
with gr.Column():
|
| 75 |
voice_model = gr.Dropdown(
|
| 76 |
label='Voice',
|
| 77 |
-
choices=[(
|
| 78 |
-
value=self.voices[0][
|
| 79 |
-
type="index" # Use index to get the second element (id) from tuple
|
| 80 |
)
|
| 81 |
|
| 82 |
prompt_input = gr.Textbox(
|
|
|
|
| 43 |
# Generate script using the scraped content
|
| 44 |
script = await self.router_client.generate_script(webpage_content, prompt, model_id)
|
| 45 |
|
| 46 |
+
# Generate audio from the script (now synchronous)
|
| 47 |
+
audio = self.elevenlabs_client.generate_audio(script, voice_id)
|
| 48 |
return script, audio
|
| 49 |
except Exception as e:
|
| 50 |
logger.error("Failed to generate podcast", exc_info=True)
|
|
|
|
| 66 |
with gr.Column():
|
| 67 |
openrouter_model = gr.Dropdown(
|
| 68 |
label='AI Model',
|
| 69 |
+
choices=self.models, # Each choice now has same id/display value
|
| 70 |
+
value=self.models[0][0] if len(self.models) > 1 else None,
|
|
|
|
| 71 |
)
|
| 72 |
|
| 73 |
with gr.Column():
|
| 74 |
voice_model = gr.Dropdown(
|
| 75 |
label='Voice',
|
| 76 |
+
choices=[(id, name) for id, name in self.voices],
|
| 77 |
+
value=self.voices[0][0] if len(self.voices) > 1 else None,
|
|
|
|
| 78 |
)
|
| 79 |
|
| 80 |
prompt_input = gr.Textbox(
|
models.py
CHANGED
|
@@ -9,15 +9,11 @@ class OpenRouterRequest(BaseModel):
|
|
| 9 |
model: str
|
| 10 |
messages: List[Message]
|
| 11 |
|
| 12 |
-
class
|
| 13 |
message: Message
|
| 14 |
-
index: int = 0
|
| 15 |
-
finish_reason: Optional[str] = None
|
| 16 |
|
| 17 |
class OpenRouterResponse(BaseModel):
|
| 18 |
-
|
| 19 |
-
choices: List[Choice]
|
| 20 |
-
model: str
|
| 21 |
|
| 22 |
class OpenRouterModel(BaseModel):
|
| 23 |
id: str
|
|
|
|
| 9 |
model: str
|
| 10 |
messages: List[Message]
|
| 11 |
|
| 12 |
+
class OpenRouterChoice(BaseModel):
|
| 13 |
message: Message
|
|
|
|
|
|
|
| 14 |
|
| 15 |
class OpenRouterResponse(BaseModel):
|
| 16 |
+
choices: List[OpenRouterChoice]
|
|
|
|
|
|
|
| 17 |
|
| 18 |
class OpenRouterModel(BaseModel):
|
| 19 |
id: str
|
requirements.txt
CHANGED
|
@@ -10,4 +10,6 @@ uvicorn
|
|
| 10 |
fastapi
|
| 11 |
langchain_anthropic
|
| 12 |
langchain_openai
|
| 13 |
-
langchain_google_genai
|
|
|
|
|
|
|
|
|
| 10 |
fastapi
|
| 11 |
langchain_anthropic
|
| 12 |
langchain_openai
|
| 13 |
+
langchain_google_genai
|
| 14 |
+
scrapingbee
|
| 15 |
+
|
scraper.py
CHANGED
|
@@ -1,24 +1,41 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
| 4 |
|
| 5 |
-
|
| 6 |
-
browser = Browser()
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from scrapingbee import ScrapingBeeClient
|
| 3 |
+
from logger import setup_logger
|
| 4 |
+
import json
|
| 5 |
|
| 6 |
+
logger = setup_logger("scraper")
|
|
|
|
| 7 |
|
| 8 |
+
# Initialize the ScrapingBee client with API key
|
| 9 |
+
client = ScrapingBeeClient(api_key=os.getenv('SCRAPINGBEE_API_KEY', ''))
|
| 10 |
|
| 11 |
+
def scrape_url(url: str) -> str:
|
| 12 |
+
"""
|
| 13 |
+
Scrape content from URL using ScrapingBee with AI extraction
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
url: The URL to scrape
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
str: Extracted text content or error message
|
| 20 |
+
"""
|
| 21 |
+
try:
|
| 22 |
+
logger.info(f"Scraping URL: {url}")
|
| 23 |
+
response = client.get(
|
| 24 |
+
url,
|
| 25 |
+
params={
|
| 26 |
+
'stealth_proxy': True,
|
| 27 |
+
'country_code': 'us',
|
| 28 |
+
'ai_query': 'Extract the main text content from this page'
|
| 29 |
+
}
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
if response.status_code == 200:
|
| 33 |
+
logger.info(f"Successfully scraped URL: {url}")
|
| 34 |
+
return response.text if response.text else "No content could be extracted from the URL"
|
| 35 |
+
else:
|
| 36 |
+
logger.error(f"Failed to scrape URL: {url}, Status: {response.status_code}")
|
| 37 |
+
return f"Failed to download the URL. Status code: {response.status_code}"
|
| 38 |
+
|
| 39 |
+
except Exception as e:
|
| 40 |
+
logger.error(f"Error scraping URL: {url}", exc_info=True)
|
| 41 |
+
return f"Error scraping the URL: {str(e)}"
|