marks commited on
Commit
4033555
·
1 Parent(s): 4145430

Fixed gradio errors

Browse files
Files changed (1) hide show
  1. interface.py +15 -127
interface.py CHANGED
@@ -2,7 +2,7 @@ import asyncio
2
  import os
3
  import time
4
  from dataclasses import dataclass
5
- from typing import List, Optional, AsyncGenerator
6
  import gradio as gr
7
  from dotenv import load_dotenv
8
  from langchain_openai import ChatOpenAI
@@ -10,9 +10,6 @@ from rich.console import Console
10
  from rich.panel import Panel
11
  from rich.text import Text
12
  from logger import setup_logger, log_execution_time, log_async_execution_time
13
-
14
- from browser_use import Agent, Browser
15
- from browser_use.browser.browser import BrowserContext
16
  from api_clients import OpenRouterClient, ElevenLabsClient
17
 
18
  load_dotenv()
@@ -20,116 +17,6 @@ load_dotenv()
20
  console = Console()
21
  logger = setup_logger("interface")
22
 
23
- @dataclass
24
- class ActionResult:
25
- is_done: bool
26
- extracted_content: Optional[str]
27
- error: Optional[str]
28
- include_in_memory: bool
29
-
30
-
31
- @dataclass
32
- class AgentHistoryList:
33
- all_results: List[ActionResult]
34
- all_model_outputs: List[dict]
35
-
36
-
37
- def parse_agent_history(history_str: str) -> None:
38
- # Split the content into sections based on ActionResult entries
39
- sections = history_str.split('ActionResult(')
40
-
41
- for i, section in enumerate(sections[1:], 1): # Skip first empty section
42
- # Extract relevant information
43
- content = ''
44
- if 'extracted_content=' in section:
45
- content = section.split('extracted_content=')[1].split(',')[0].strip("'")
46
-
47
- if content:
48
- header = Text(f'Step {i}', style='bold blue')
49
- panel = Panel(content, title=header, border_style='blue')
50
- console.print(panel)
51
- console.print()
52
-
53
-
54
- async def run_browser_task(
55
- task: str,
56
- api_key: str,
57
- provider: str = 'openai',
58
- model: str = 'gpt-4-vision',
59
- headless: bool = True,
60
- ) -> str:
61
- if not api_key.strip():
62
- return 'Please provide an API key'
63
-
64
- if provider == 'openai':
65
- os.environ['OPENAI_API_KEY'] = api_key
66
- llm = ChatOpenAI(model=model)
67
- elif provider == 'anthropic':
68
- os.environ['ANTHROPIC_API_KEY'] = api_key
69
- llm = ChatAnthropic(model=model)
70
- else: # google
71
- os.environ['GOOGLE_API_KEY'] = api_key
72
- llm = ChatGoogleGenerativeAI(model=model)
73
-
74
- try:
75
- agent = Agent(
76
- task=task,
77
- llm=llm,
78
- browser=Browser(BrowserContext(headless=True))
79
- )
80
- result = await agent.run()
81
- # TODO: The result cloud be parsed better
82
- return result
83
- except Exception as e:
84
- return f'Error: {str(e)}'
85
-
86
-
87
- @log_async_execution_time(logger)
88
- async def scrape_content(url: str) -> str:
89
- """
90
- Scrape and summarize content from the given URL using browser automation
91
-
92
- This function performs the following steps:
93
- 1. Validates the input URL
94
- 2. Initializes the browser agent
95
- 3. Extracts and summarizes the content
96
-
97
- Args:
98
- url: Target URL to scrape
99
-
100
- Returns:
101
- Summarized content suitable for podcast generation
102
-
103
- Raises:
104
- ValueError: If URL is invalid or content extraction fails
105
- """
106
- logger.info(f"Starting content scrape for URL: {url}")
107
-
108
- # Input validation
109
- if not url.startswith(('http://', 'https://')):
110
- logger.error(f"Invalid URL format: {url}")
111
- raise ValueError("URL must start with http:// or https://")
112
-
113
- try:
114
- logger.debug("Initializing LLM and browser agent")
115
- llm = ChatOpenAI(model="gpt-4")
116
- agent = Agent(
117
- task=f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way.",
118
- llm=llm,
119
- browser=Browser(BrowserContext(headless=True))
120
- )
121
-
122
- logger.info("Executing content extraction")
123
- result = await agent.run()
124
-
125
- logger.debug(f"Content extraction successful. Length: {len(result)} chars")
126
- logger.debug(f"Content preview: {result[:200]}...")
127
-
128
- return result
129
- except Exception as e:
130
- logger.error(f"Content extraction failed for {url}", exc_info=True)
131
- raise
132
-
133
  @log_async_execution_time(logger)
134
  async def create_podcast(
135
  url: str,
@@ -138,14 +25,12 @@ async def create_podcast(
138
  voice_id: str,
139
  openrouter_key: str,
140
  model_id: str,
141
- ) -> AsyncGenerator[tuple[Optional[str], str], None]:
142
  """
143
  Create a podcast through a multi-step process:
144
  1. Content extraction from URL
145
  2. Script generation using AI
146
  3. Voice synthesis
147
-
148
- Progress updates are yielded at each step for UI feedback.
149
  """
150
  logger.info(f"Starting podcast creation for URL: {url}")
151
  logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
@@ -159,21 +44,24 @@ async def create_podcast(
159
 
160
  # Phase 1: Content scraping
161
  logger.info("Phase 1/3: Content scraping")
162
- yield None, "Scraping website content..."
163
- content = await scrape_content(url)
 
 
 
 
 
164
  logger.debug(f"Scraped content length: {len(content)} chars")
165
 
166
  # Phase 2: Script generation
167
  logger.info("Phase 2/3: Script generation")
168
- yield None, "Generating podcast script..."
169
  script = await openrouter.generate_script(content, prompt, model_id)
170
  logger.debug(f"Generated script length: {len(script)} chars")
171
 
172
  # Phase 3: Audio synthesis
173
  logger.info("Phase 3/3: Audio generation")
174
- yield None, "Converting to audio..."
175
- audio = elevenlabs.generate_audio(script, voice_id)
176
- logger.debug(f"Generated audio size: {len(audio)} bytes")
177
 
178
  # Save output
179
  audio_path = f"podcast_{int(time.time())}.mp3"
@@ -182,11 +70,11 @@ async def create_podcast(
182
  f.write(audio)
183
 
184
  logger.info("Podcast creation completed successfully")
185
- yield audio_path, "Podcast created successfully!"
186
 
187
  except Exception as e:
188
  logger.error("Podcast creation failed", exc_info=True)
189
- yield None, f"Error: {str(e)}"
190
 
191
  def create_ui():
192
  logger.info("Initializing Gradio interface")
@@ -231,7 +119,7 @@ def create_ui():
231
  submit_btn = gr.Button('Create Podcast', variant='primary')
232
 
233
  with gr.Column(scale=1):
234
- audio_output = gr.Audio(label="Generated Podcast")
235
  status = gr.Textbox(label='Status', interactive=False)
236
 
237
  # Event handlers
@@ -276,4 +164,4 @@ def create_ui():
276
 
277
  if __name__ == '__main__':
278
  demo = create_ui()
279
- demo.launch()
 
2
  import os
3
  import time
4
  from dataclasses import dataclass
5
+ from typing import List, Optional, AsyncGenerator, Tuple
6
  import gradio as gr
7
  from dotenv import load_dotenv
8
  from langchain_openai import ChatOpenAI
 
10
  from rich.panel import Panel
11
  from rich.text import Text
12
  from logger import setup_logger, log_execution_time, log_async_execution_time
 
 
 
13
  from api_clients import OpenRouterClient, ElevenLabsClient
14
 
15
  load_dotenv()
 
17
  console = Console()
18
  logger = setup_logger("interface")
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @log_async_execution_time(logger)
21
  async def create_podcast(
22
  url: str,
 
25
  voice_id: str,
26
  openrouter_key: str,
27
  model_id: str,
28
+ ) -> Tuple[Optional[str], str]:
29
  """
30
  Create a podcast through a multi-step process:
31
  1. Content extraction from URL
32
  2. Script generation using AI
33
  3. Voice synthesis
 
 
34
  """
35
  logger.info(f"Starting podcast creation for URL: {url}")
36
  logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
 
44
 
45
  # Phase 1: Content scraping
46
  logger.info("Phase 1/3: Content scraping")
47
+ if not url.startswith(('http://', 'https://')):
48
+ raise ValueError("URL must start with http:// or https://")
49
+
50
+ logger.debug("Initializing LLM and browser agent")
51
+ llm = ChatOpenAI(model="gpt-4")
52
+ task = f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way."
53
+ content = await llm.apredict(task)
54
  logger.debug(f"Scraped content length: {len(content)} chars")
55
 
56
  # Phase 2: Script generation
57
  logger.info("Phase 2/3: Script generation")
 
58
  script = await openrouter.generate_script(content, prompt, model_id)
59
  logger.debug(f"Generated script length: {len(script)} chars")
60
 
61
  # Phase 3: Audio synthesis
62
  logger.info("Phase 3/3: Audio generation")
63
+ audio = await elevenlabs.generate_audio(script, voice_id)
64
+ logger.debug(f"Generated audio data received")
 
65
 
66
  # Save output
67
  audio_path = f"podcast_{int(time.time())}.mp3"
 
70
  f.write(audio)
71
 
72
  logger.info("Podcast creation completed successfully")
73
+ return audio_path, "Podcast created successfully!"
74
 
75
  except Exception as e:
76
  logger.error("Podcast creation failed", exc_info=True)
77
+ return None, f"Error: {str(e)}"
78
 
79
  def create_ui():
80
  logger.info("Initializing Gradio interface")
 
119
  submit_btn = gr.Button('Create Podcast', variant='primary')
120
 
121
  with gr.Column(scale=1):
122
+ audio_output = gr.Audio(label="Generated Podcast", type="filepath")
123
  status = gr.Textbox(label='Status', interactive=False)
124
 
125
  # Event handlers
 
164
 
165
  if __name__ == '__main__':
166
  demo = create_ui()
167
+ demo.queue().launch()