Spaces:

phxdev
/

podcaster

Runtime error

App Files Files Community

marks commited on Jan 28, 2025

Commit

a7eb60c

2 Parent(s): a352c42 5290967

Merge branch 'main' of https://huggingface.co/spaces/phxdev/podcaster

Browse files

Files changed (5) hide show

api_clients.py +21 -15
app.py +6 -8
models.py +2 -6
requirements.txt +3 -1
scraper.py +38 -21

api_clients.py CHANGED Viewed

@@ -5,7 +5,7 @@ import elevenlabs
 import time
 from contextlib import asynccontextmanager
 from logger import setup_logger, log_execution_time, log_async_execution_time
-from models import OpenRouterRequest, OpenRouterResponse, Message, OpenRouterModel
 logger = setup_logger("api_clients")
@@ -49,6 +49,8 @@ class OpenRouterClient:
         self.base_url = "https://openrouter.ai/api/v1"
         self.headers = {
             "Authorization": f"Bearer {api_key}",
             "Content-Type": "application/json"
         }
         logger.debug("OpenRouter client initialized successfully")
@@ -66,6 +68,8 @@ class OpenRouterClient:
         # Update headers when API key changes
         self.headers = {
             "Authorization": f"Bearer {value}",
             "Content-Type": "application/json",
         }
         logger.info("OpenRouter API key updated successfully")
@@ -82,10 +86,7 @@ class OpenRouterClient:
         Fetch available models from OpenRouter API using pydantic models
         Returns:
-            List of tuples containing (model_id, model_description)
-        Raises:
-            ValueError: If API request fails
         """
         logger.info("Fetching available models from OpenRouter")
         async with self.get_session() as session:
@@ -94,7 +95,7 @@ class OpenRouterClient:
                 data = await response.json()
                 models = [OpenRouterModel(**model) for model in data["data"]]
                 logger.info(f"Successfully fetched {len(models)} models")
-                return [(model.id, model.name) for model in models]
     @log_async_execution_time(logger)
     async def generate_script(self, content: str, prompt: str, model_id: str) -> str:
@@ -158,12 +159,16 @@ Focus on making it engaging and natural to listen to."""
             async with self.get_session() as session:
                 async with session.post(
                     f"{self.base_url}/chat/completions",
-                    json=request.dict()
                 ) as response:
-                    response.raise_for_status()
                     data = await response.json()
-                    router_response = OpenRouterResponse(**data)
-                    return router_response.choices[0].message.content
         except Exception as e:
             logger.error(f"Script generation failed", exc_info=True)
             raise
@@ -184,16 +189,17 @@ class ElevenLabsClient:
         try:
             voices = elevenlabs.voices()
             return [(
-                voice.voice_id,  # Value (hidden from user)
                 f"{voice.name} ({voice.labels.get('accent', 'No accent')})" +
-                (f" - {voice.description[:50]}..." if voice.description else "")
             ) for voice in voices]
         except Exception as e:
             logger.error("Failed to fetch voices from ElevenLabs", exc_info=True)
             raise
-    async def generate_audio(self, text: str, voice_id: str):
-        """Asynchronously generate audio"""
         logger.info(f"Starting audio generation with voice: {voice_id}")
         logger.debug(f"Input text length: {len(text)} chars")
@@ -202,7 +208,7 @@ class ElevenLabsClient:
         try:
             start_time = time.time()
-            audio = await elevenlabs.generate(  # Assuming elevenlabs supports async
                 text=text,
                 voice=voice_id,
                 model="eleven_monolingual_v1"

 import time
 from contextlib import asynccontextmanager
 from logger import setup_logger, log_execution_time, log_async_execution_time
+from models import OpenRouterModel
 logger = setup_logger("api_clients")
         self.base_url = "https://openrouter.ai/api/v1"
         self.headers = {
             "Authorization": f"Bearer {api_key}",
+            "HTTP-Referer": "https://localhost:7860",  # Required by OpenRouter
+            "X-Title": "URL to Podcast Generator",  # Required by OpenRouter
             "Content-Type": "application/json"
         }
         logger.debug("OpenRouter client initialized successfully")
         # Update headers when API key changes
         self.headers = {
             "Authorization": f"Bearer {value}",
+            "HTTP-Referer": "https://localhost:7860",
+            "X-Title": "URL to Podcast Generator",
             "Content-Type": "application/json",
         }
         logger.info("OpenRouter API key updated successfully")
         Fetch available models from OpenRouter API using pydantic models
         Returns:
+            List of tuples containing (model_id, model_id) where both values are the same
         """
         logger.info("Fetching available models from OpenRouter")
         async with self.get_session() as session:
                 data = await response.json()
                 models = [OpenRouterModel(**model) for model in data["data"]]
                 logger.info(f"Successfully fetched {len(models)} models")
+                return [(model.name, model.id) for model in models]
     @log_async_execution_time(logger)
     async def generate_script(self, content: str, prompt: str, model_id: str) -> str:
             async with self.get_session() as session:
                 async with session.post(
                     f"{self.base_url}/chat/completions",
+                    json=request_data
                 ) as response:
+                    if response.status != 200:
+                        error_text = await response.text()
+                        logger.error(f"OpenRouter API error: {error_text}")
+                        raise ValueError(f"API request failed: {error_text}")
                     data = await response.json()
+                    return data['choices'][0]['message']['content']
         except Exception as e:
             logger.error(f"Script generation failed", exc_info=True)
             raise
         try:
             voices = elevenlabs.voices()
             return [(
                 f"{voice.name} ({voice.labels.get('accent', 'No accent')})" +
+                (f" - {voice.description[:50]}..." if voice.description else ""),
+                voice.voice_id  # Value (hidden from user)
             ) for voice in voices]
         except Exception as e:
             logger.error("Failed to fetch voices from ElevenLabs", exc_info=True)
             raise
+    def generate_audio(self, text: str, voice_id: str):
+        """Generate audio synchronously"""
         logger.info(f"Starting audio generation with voice: {voice_id}")
         logger.debug(f"Input text length: {len(text)} chars")
         try:
             start_time = time.time()
+            audio = elevenlabs.generate(
                 text=text,
                 voice=voice_id,
                 model="eleven_monolingual_v1"

app.py CHANGED Viewed

@@ -43,8 +43,8 @@ class PodcasterUI:
             # Generate script using the scraped content
             script = await self.router_client.generate_script(webpage_content, prompt, model_id)
-            # Generate audio from the script
-            audio = await self.elevenlabs_client.generate_audio(script, voice_id)
             return script, audio
         except Exception as e:
             logger.error("Failed to generate podcast", exc_info=True)
@@ -66,17 +66,15 @@ class PodcasterUI:
                         with gr.Column():
                             openrouter_model = gr.Dropdown(
                                 label='AI Model',
-                                choices=[(name, id) for id, name in self.models],  # Swap order for display
-                                value=self.models[0][1] if len(self.models) > 1 else None,
-                                type="index"  # Use index to get the second element (id) from tuple
                             )
                         with gr.Column():
                             voice_model = gr.Dropdown(
                                 label='Voice',
-                                choices=[(name, id) for id, name in self.voices],  # Swap order for display
-                                value=self.voices[0][1] if len(self.voices) > 1 else None,
-                                type="index"  # Use index to get the second element (id) from tuple
                             )
                     prompt_input = gr.Textbox(

             # Generate script using the scraped content
             script = await self.router_client.generate_script(webpage_content, prompt, model_id)
+            # Generate audio from the script (now synchronous)
+            audio = self.elevenlabs_client.generate_audio(script, voice_id)
             return script, audio
         except Exception as e:
             logger.error("Failed to generate podcast", exc_info=True)
                         with gr.Column():
                             openrouter_model = gr.Dropdown(
                                 label='AI Model',
+                                choices=self.models,  # Each choice now has same id/display value
+                                value=self.models[0][0] if len(self.models) > 1 else None,
                             )
                         with gr.Column():
                             voice_model = gr.Dropdown(
                                 label='Voice',
+                                choices=[(id, name) for id, name in self.voices],
+                                value=self.voices[0][0] if len(self.voices) > 1 else None,
                             )
                     prompt_input = gr.Textbox(

models.py CHANGED Viewed

@@ -9,15 +9,11 @@ class OpenRouterRequest(BaseModel):
     model: str
     messages: List[Message]
-class Choice(BaseModel):
     message: Message
-    index: int = 0
-    finish_reason: Optional[str] = None
 class OpenRouterResponse(BaseModel):
-    id: str
-    choices: List[Choice]
-    model: str
 class OpenRouterModel(BaseModel):
     id: str

     model: str
     messages: List[Message]
+class OpenRouterChoice(BaseModel):
     message: Message
 class OpenRouterResponse(BaseModel):
+    choices: List[OpenRouterChoice]
 class OpenRouterModel(BaseModel):
     id: str

requirements.txt CHANGED Viewed

@@ -10,4 +10,6 @@ uvicorn
 fastapi
 langchain_anthropic
 langchain_openai
-langchain_google_genai

 fastapi
 langchain_anthropic
 langchain_openai
+langchain_google_genai
+scrapingbee

scraper.py CHANGED Viewed

@@ -1,24 +1,41 @@
-def scrape_url(url):
-    from browser_use import Browser
-    from bs4 import BeautifulSoup
-    # Initialize the browser
-    browser = Browser()
-    # Open the URL
-    browser.open(url)
-    # Get the page content
-    content = browser.get_page_source()
-    # Close the browser
-    browser.close()
-    # Parse the HTML content
-    soup = BeautifulSoup(content, 'html.parser')
-    # Extract relevant text (modify the selector as needed)
-    text_elements = soup.find_all(['main'])
-    text_content = ' '.join([element.get_text() for element in text_elements])
-    return text_content.strip()

+import os
+from scrapingbee import ScrapingBeeClient
+from logger import setup_logger
+import json
+logger = setup_logger("scraper")
+# Initialize the ScrapingBee client with API key
+client = ScrapingBeeClient(api_key=os.getenv('SCRAPINGBEE_API_KEY', ''))
+def scrape_url(url: str) -> str:
+    """
+    Scrape content from URL using ScrapingBee with AI extraction
+    Args:
+        url: The URL to scrape
+    Returns:
+        str: Extracted text content or error message
+    """
+    try:
+        logger.info(f"Scraping URL: {url}")
+        response = client.get(
+            url,
+            params={
+                'stealth_proxy': True,
+                'country_code': 'us',
+                'ai_query': 'Extract the main text content from this page'
+            }
+        )
+        if response.status_code == 200:
+            logger.info(f"Successfully scraped URL: {url}")
+            return response.text if response.text else "No content could be extracted from the URL"
+        else:
+            logger.error(f"Failed to scrape URL: {url}, Status: {response.status_code}")
+            return f"Failed to download the URL. Status code: {response.status_code}"
+    except Exception as e:
+        logger.error(f"Error scraping URL: {url}", exc_info=True)
+        return f"Error scraping the URL: {str(e)}"