Spaces:

phxdev
/

podcaster

Runtime error

App Files Files Community

marks commited on Jan 28, 2025

Commit

4033555

1 Parent(s): 4145430

Fixed gradio errors

Browse files

Files changed (1) hide show

interface.py +15 -127

interface.py CHANGED Viewed

@@ -2,7 +2,7 @@ import asyncio
 import os
 import time
 from dataclasses import dataclass
-from typing import List, Optional, AsyncGenerator
 import gradio as gr
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
@@ -10,9 +10,6 @@ from rich.console import Console
 from rich.panel import Panel
 from rich.text import Text
 from logger import setup_logger, log_execution_time, log_async_execution_time
-from browser_use import Agent, Browser
-from browser_use.browser.browser import BrowserContext
 from api_clients import OpenRouterClient, ElevenLabsClient
 load_dotenv()
@@ -20,116 +17,6 @@ load_dotenv()
 console = Console()
 logger = setup_logger("interface")
-@dataclass
-class ActionResult:
-	is_done: bool
-	extracted_content: Optional[str]
-	error: Optional[str]
-	include_in_memory: bool
-@dataclass
-class AgentHistoryList:
-	all_results: List[ActionResult]
-	all_model_outputs: List[dict]
-def parse_agent_history(history_str: str) -> None:
-	# Split the content into sections based on ActionResult entries
-	sections = history_str.split('ActionResult(')
-	for i, section in enumerate(sections[1:], 1):  # Skip first empty section
-		# Extract relevant information
-		content = ''
-		if 'extracted_content=' in section:
-			content = section.split('extracted_content=')[1].split(',')[0].strip("'")
-		if content:
-			header = Text(f'Step {i}', style='bold blue')
-			panel = Panel(content, title=header, border_style='blue')
-			console.print(panel)
-			console.print()
-async def run_browser_task(
-	task: str,
-	api_key: str,
-	provider: str = 'openai',
-	model: str = 'gpt-4-vision',
-	headless: bool = True,
-) -> str:
-	if not api_key.strip():
-		return 'Please provide an API key'
-	if provider == 'openai':
-		os.environ['OPENAI_API_KEY'] = api_key
-		llm = ChatOpenAI(model=model)
-	elif provider == 'anthropic':
-		os.environ['ANTHROPIC_API_KEY'] = api_key
-		llm = ChatAnthropic(model=model)
-	else:  # google
-		os.environ['GOOGLE_API_KEY'] = api_key
-		llm = ChatGoogleGenerativeAI(model=model)
-	try:
-		agent = Agent(
-			task=task,
-			llm=llm,
-			browser=Browser(BrowserContext(headless=True))
-		)
-		result = await agent.run()
-		#  TODO: The result cloud be parsed better
-		return result
-	except Exception as e:
-		return f'Error: {str(e)}'
-@log_async_execution_time(logger)
-async def scrape_content(url: str) -> str:
-    """
-    Scrape and summarize content from the given URL using browser automation
-    This function performs the following steps:
-    1. Validates the input URL
-    2. Initializes the browser agent
-    3. Extracts and summarizes the content
-    Args:
-        url: Target URL to scrape
-    Returns:
-        Summarized content suitable for podcast generation
-    Raises:
-        ValueError: If URL is invalid or content extraction fails
-    """
-    logger.info(f"Starting content scrape for URL: {url}")
-    # Input validation
-    if not url.startswith(('http://', 'https://')):
-        logger.error(f"Invalid URL format: {url}")
-        raise ValueError("URL must start with http:// or https://")
-    try:
-        logger.debug("Initializing LLM and browser agent")
-        llm = ChatOpenAI(model="gpt-4")
-        agent = Agent(
-            task=f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way.",
-            llm=llm,
-            browser=Browser(BrowserContext(headless=True))
-        )
-        logger.info("Executing content extraction")
-        result = await agent.run()
-        logger.debug(f"Content extraction successful. Length: {len(result)} chars")
-        logger.debug(f"Content preview: {result[:200]}...")
-        return result
-    except Exception as e:
-        logger.error(f"Content extraction failed for {url}", exc_info=True)
-        raise
 @log_async_execution_time(logger)
 async def create_podcast(
     url: str,
@@ -138,14 +25,12 @@ async def create_podcast(
     voice_id: str,
     openrouter_key: str,
     model_id: str,
-) -> AsyncGenerator[tuple[Optional[str], str], None]:
     """
     Create a podcast through a multi-step process:
     1. Content extraction from URL
     2. Script generation using AI
     3. Voice synthesis
-    Progress updates are yielded at each step for UI feedback.
     """
     logger.info(f"Starting podcast creation for URL: {url}")
     logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
@@ -159,21 +44,24 @@ async def create_podcast(
         # Phase 1: Content scraping
         logger.info("Phase 1/3: Content scraping")
-        yield None, "Scraping website content..."
-        content = await scrape_content(url)
         logger.debug(f"Scraped content length: {len(content)} chars")
         # Phase 2: Script generation
         logger.info("Phase 2/3: Script generation")
-        yield None, "Generating podcast script..."
         script = await openrouter.generate_script(content, prompt, model_id)
         logger.debug(f"Generated script length: {len(script)} chars")
         # Phase 3: Audio synthesis
         logger.info("Phase 3/3: Audio generation")
-        yield None, "Converting to audio..."
-        audio = elevenlabs.generate_audio(script, voice_id)
-        logger.debug(f"Generated audio size: {len(audio)} bytes")
         # Save output
         audio_path = f"podcast_{int(time.time())}.mp3"
@@ -182,11 +70,11 @@ async def create_podcast(
             f.write(audio)
         logger.info("Podcast creation completed successfully")
-        yield audio_path, "Podcast created successfully!"
     except Exception as e:
         logger.error("Podcast creation failed", exc_info=True)
-        yield None, f"Error: {str(e)}"
 def create_ui():
     logger.info("Initializing Gradio interface")
@@ -231,7 +119,7 @@ def create_ui():
                 submit_btn = gr.Button('Create Podcast', variant='primary')
             with gr.Column(scale=1):
-                audio_output = gr.Audio(label="Generated Podcast")
                 status = gr.Textbox(label='Status', interactive=False)
         # Event handlers
@@ -276,4 +164,4 @@ def create_ui():
 if __name__ == '__main__':
     demo = create_ui()
-    demo.launch()

 import os
 import time
 from dataclasses import dataclass
+from typing import List, Optional, AsyncGenerator, Tuple
 import gradio as gr
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
 from rich.panel import Panel
 from rich.text import Text
 from logger import setup_logger, log_execution_time, log_async_execution_time
 from api_clients import OpenRouterClient, ElevenLabsClient
 load_dotenv()
 console = Console()
 logger = setup_logger("interface")
 @log_async_execution_time(logger)
 async def create_podcast(
     url: str,
     voice_id: str,
     openrouter_key: str,
     model_id: str,
+) -> Tuple[Optional[str], str]:
     """
     Create a podcast through a multi-step process:
     1. Content extraction from URL
     2. Script generation using AI
     3. Voice synthesis
     """
     logger.info(f"Starting podcast creation for URL: {url}")
     logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
         # Phase 1: Content scraping
         logger.info("Phase 1/3: Content scraping")
+        if not url.startswith(('http://', 'https://')):
+            raise ValueError("URL must start with http:// or https://")
+        logger.debug("Initializing LLM and browser agent")
+        llm = ChatOpenAI(model="gpt-4")
+        task = f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way."
+        content = await llm.apredict(task)
         logger.debug(f"Scraped content length: {len(content)} chars")
         # Phase 2: Script generation
         logger.info("Phase 2/3: Script generation")
         script = await openrouter.generate_script(content, prompt, model_id)
         logger.debug(f"Generated script length: {len(script)} chars")
         # Phase 3: Audio synthesis
         logger.info("Phase 3/3: Audio generation")
+        audio = await elevenlabs.generate_audio(script, voice_id)
+        logger.debug(f"Generated audio data received")
         # Save output
         audio_path = f"podcast_{int(time.time())}.mp3"
             f.write(audio)
         logger.info("Podcast creation completed successfully")
+        return audio_path, "Podcast created successfully!"
     except Exception as e:
         logger.error("Podcast creation failed", exc_info=True)
+        return None, f"Error: {str(e)}"
 def create_ui():
     logger.info("Initializing Gradio interface")
                 submit_btn = gr.Button('Create Podcast', variant='primary')
             with gr.Column(scale=1):
+                audio_output = gr.Audio(label="Generated Podcast", type="filepath")
                 status = gr.Textbox(label='Status', interactive=False)
         # Event handlers
 if __name__ == '__main__':
     demo = create_ui()
+    demo.queue().launch()