marks commited on
Commit
f12f1e5
·
2 Parent(s): 6844262 a2f0554

Merge branch 'main' of https://huggingface.co/spaces/phxdev/podcaster

Browse files
Files changed (6) hide show
  1. README.md +6 -4
  2. app.py +38 -0
  3. podcast_generator.py +9 -0
  4. requirements.txt +9 -38
  5. scraper.py +24 -0
  6. tts.py +23 -0
README.md CHANGED
@@ -1,9 +1,11 @@
1
  ---
2
- title: Podcaster
3
- emoji: 👁
4
  colorFrom: indigo
5
- colorTo: gray
6
- sdk: docker
 
 
7
  pinned: false
8
  short_description: A Podcast Generator
9
  python_version: 3.12
 
1
  ---
2
+ title: URL to Podcast Generator
3
+ emoji: 🎙️
4
  colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: "5.12.0"
8
+ app_file: app.py
9
  pinned: false
10
  short_description: A Podcast Generator
11
  python_version: 3.12
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template
2
+ import gradio as gr
3
+ from scraper import scrape_url
4
+ from podcast_generator import PodcastGenerator
5
+ from tts import text_to_speech
6
+
7
+ app = Flask(__name__)
8
+
9
+ def generate_podcast(url):
10
+ content = scrape_url(url)
11
+ podcast_generator = PodcastGenerator()
12
+ podcast_text = podcast_generator.generate_podcast(content)
13
+ audio_file = text_to_speech(podcast_text)
14
+ return audio_file
15
+
16
+ # Create Gradio interface
17
+ demo = gr.Interface(
18
+ fn=generate_podcast,
19
+ inputs=gr.Textbox(
20
+ label="Website URL",
21
+ placeholder="Enter the URL of the website you want to convert to a podcast"
22
+ ),
23
+ outputs=gr.Audio(label="Generated Podcast"),
24
+ title="URL to Podcast Generator",
25
+ description="Enter a URL to generate a podcast episode based on its content.",
26
+ theme="huggingface",
27
+ allow_flagging="never",
28
+ )
29
+
30
+ # Mount Gradio interface to Flask
31
+ app = gr.mount_gradio_app(app, demo, path="/")
32
+
33
+ if __name__ == "__main__":
34
+ app.run(
35
+ host="0.0.0.0",
36
+ port=7860,
37
+ debug=True
38
+ )
podcast_generator.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ class PodcastGenerator:
2
+ def __init__(self, model_client):
3
+ self.model_client = model_client
4
+
5
+ def generate_podcast(self, scraped_content):
6
+ prompt = f"Create a podcast episode based on the following content: {scraped_content}"
7
+ response = self.model_client.generate(prompt, max_length=300) # Assuming 300 tokens is roughly 3 minutes
8
+ podcast_text = response.get('text', '')
9
+ return podcast_text.strip()
requirements.txt CHANGED
@@ -1,38 +1,9 @@
1
- # Core AI and Language Models
2
- langchain-openai>=0.0.1
3
- langchain-google-genai>=0.0.1
4
- langchain-anthropic>=0.0.1
5
- langchain-ollama>=0.0.1
6
-
7
- # Web and Browser Automation
8
- playwright>=1.40.0
9
- browser-use>=0.1.20
10
- aiohttp>=3.8.0
11
- requests>=2.31.0
12
-
13
- # Audio Generation
14
- elevenlabs>=0.1.0
15
-
16
- # Data Handling and Validation
17
- pydantic>=2.0.0
18
- dataclasses>=0.6
19
- typing-extensions>=4.8.0
20
-
21
- # Configuration and Environment
22
- python-dotenv>=1.0.0
23
-
24
- # Error Handling and Logging
25
- python-json-logger>=2.0.0 # Structured JSON logging
26
- rich>=13.0.0 # For console output formatting
27
- backoff>=2.2.0 # Retry mechanism
28
- tenacity>=8.2.0 # Retry utilities
29
-
30
- # UI and Interface
31
- gradio>=4.0.0 # For better themes and UI components
32
-
33
- # Async Support
34
- asyncio>=3.4.3
35
-
36
- # Testing and Development
37
- pytest>=7.0.0 # For running tests
38
- mypy>=1.0.0 # For type checking
 
1
+ gradio==3.0.0
2
+ browser-use
3
+ elevenlabs==0.2.26
4
+ flask==3.0.0
5
+ pydub==0.25.1 # audio processing library
6
+ python-dotenv==1.0.0 # for environment variables
7
+ requests==2.31.0 # for API calls
8
+ numpy>1.24.3 # common dependency
9
+ openrouter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scraper.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def scrape_url(url):
2
+ from browser_use import Browser
3
+ from bs4 import BeautifulSoup
4
+
5
+ # Initialize the browser
6
+ browser = Browser()
7
+
8
+ # Open the URL
9
+ browser.open(url)
10
+
11
+ # Get the page content
12
+ content = browser.get_page_source()
13
+
14
+ # Close the browser
15
+ browser.close()
16
+
17
+ # Parse the HTML content
18
+ soup = BeautifulSoup(content, 'html.parser')
19
+
20
+ # Extract relevant text (modify the selector as needed)
21
+ text_elements = soup.find_all(['main'])
22
+ text_content = ' '.join([element.get_text() for element in text_elements])
23
+
24
+ return text_content.strip()
tts.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def text_to_speech(text, api_key):
2
+ import requests
3
+
4
+ url = "https://api.elevenlabs.io/v1/text-to-speech"
5
+ headers = {
6
+ "Authorization": f"Bearer {api_key}",
7
+ "Content-Type": "application/json"
8
+ }
9
+ data = {
10
+ "text": text,
11
+ "voice": "en_us_male", # Specify the desired voice
12
+ "output_format": "mp3" # Specify the desired output format
13
+ }
14
+
15
+ response = requests.post(url, headers=headers, json=data)
16
+
17
+ if response.status_code == 200:
18
+ audio_content = response.content
19
+ with open("podcast_episode.mp3", "wb") as audio_file:
20
+ audio_file.write(audio_content)
21
+ return "podcast_episode.mp3"
22
+ else:
23
+ raise Exception(f"Error: {response.status_code}, {response.text}")