Merge branch 'main' of https://huggingface.co/spaces/phxdev/podcaster
Browse files- README.md +6 -4
- app.py +38 -0
- podcast_generator.py +9 -0
- requirements.txt +9 -38
- scraper.py +24 -0
- tts.py +23 -0
README.md
CHANGED
|
@@ -1,9 +1,11 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: indigo
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
short_description: A Podcast Generator
|
| 9 |
python_version: 3.12
|
|
|
|
| 1 |
---
|
| 2 |
+
title: URL to Podcast Generator
|
| 3 |
+
emoji: 🎙️
|
| 4 |
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: "5.12.0"
|
| 8 |
+
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
short_description: A Podcast Generator
|
| 11 |
python_version: 3.12
|
app.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, render_template
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from scraper import scrape_url
|
| 4 |
+
from podcast_generator import PodcastGenerator
|
| 5 |
+
from tts import text_to_speech
|
| 6 |
+
|
| 7 |
+
app = Flask(__name__)
|
| 8 |
+
|
| 9 |
+
def generate_podcast(url):
|
| 10 |
+
content = scrape_url(url)
|
| 11 |
+
podcast_generator = PodcastGenerator()
|
| 12 |
+
podcast_text = podcast_generator.generate_podcast(content)
|
| 13 |
+
audio_file = text_to_speech(podcast_text)
|
| 14 |
+
return audio_file
|
| 15 |
+
|
| 16 |
+
# Create Gradio interface
|
| 17 |
+
demo = gr.Interface(
|
| 18 |
+
fn=generate_podcast,
|
| 19 |
+
inputs=gr.Textbox(
|
| 20 |
+
label="Website URL",
|
| 21 |
+
placeholder="Enter the URL of the website you want to convert to a podcast"
|
| 22 |
+
),
|
| 23 |
+
outputs=gr.Audio(label="Generated Podcast"),
|
| 24 |
+
title="URL to Podcast Generator",
|
| 25 |
+
description="Enter a URL to generate a podcast episode based on its content.",
|
| 26 |
+
theme="huggingface",
|
| 27 |
+
allow_flagging="never",
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Mount Gradio interface to Flask
|
| 31 |
+
app = gr.mount_gradio_app(app, demo, path="/")
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
app.run(
|
| 35 |
+
host="0.0.0.0",
|
| 36 |
+
port=7860,
|
| 37 |
+
debug=True
|
| 38 |
+
)
|
podcast_generator.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class PodcastGenerator:
|
| 2 |
+
def __init__(self, model_client):
|
| 3 |
+
self.model_client = model_client
|
| 4 |
+
|
| 5 |
+
def generate_podcast(self, scraped_content):
|
| 6 |
+
prompt = f"Create a podcast episode based on the following content: {scraped_content}"
|
| 7 |
+
response = self.model_client.generate(prompt, max_length=300) # Assuming 300 tokens is roughly 3 minutes
|
| 8 |
+
podcast_text = response.get('text', '')
|
| 9 |
+
return podcast_text.strip()
|
requirements.txt
CHANGED
|
@@ -1,38 +1,9 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
#
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
aiohttp>=3.8.0
|
| 11 |
-
requests>=2.31.0
|
| 12 |
-
|
| 13 |
-
# Audio Generation
|
| 14 |
-
elevenlabs>=0.1.0
|
| 15 |
-
|
| 16 |
-
# Data Handling and Validation
|
| 17 |
-
pydantic>=2.0.0
|
| 18 |
-
dataclasses>=0.6
|
| 19 |
-
typing-extensions>=4.8.0
|
| 20 |
-
|
| 21 |
-
# Configuration and Environment
|
| 22 |
-
python-dotenv>=1.0.0
|
| 23 |
-
|
| 24 |
-
# Error Handling and Logging
|
| 25 |
-
python-json-logger>=2.0.0 # Structured JSON logging
|
| 26 |
-
rich>=13.0.0 # For console output formatting
|
| 27 |
-
backoff>=2.2.0 # Retry mechanism
|
| 28 |
-
tenacity>=8.2.0 # Retry utilities
|
| 29 |
-
|
| 30 |
-
# UI and Interface
|
| 31 |
-
gradio>=4.0.0 # For better themes and UI components
|
| 32 |
-
|
| 33 |
-
# Async Support
|
| 34 |
-
asyncio>=3.4.3
|
| 35 |
-
|
| 36 |
-
# Testing and Development
|
| 37 |
-
pytest>=7.0.0 # For running tests
|
| 38 |
-
mypy>=1.0.0 # For type checking
|
|
|
|
| 1 |
+
gradio==3.0.0
|
| 2 |
+
browser-use
|
| 3 |
+
elevenlabs==0.2.26
|
| 4 |
+
flask==3.0.0
|
| 5 |
+
pydub==0.25.1 # audio processing library
|
| 6 |
+
python-dotenv==1.0.0 # for environment variables
|
| 7 |
+
requests==2.31.0 # for API calls
|
| 8 |
+
numpy>1.24.3 # common dependency
|
| 9 |
+
openrouter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scraper.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def scrape_url(url):
|
| 2 |
+
from browser_use import Browser
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
|
| 5 |
+
# Initialize the browser
|
| 6 |
+
browser = Browser()
|
| 7 |
+
|
| 8 |
+
# Open the URL
|
| 9 |
+
browser.open(url)
|
| 10 |
+
|
| 11 |
+
# Get the page content
|
| 12 |
+
content = browser.get_page_source()
|
| 13 |
+
|
| 14 |
+
# Close the browser
|
| 15 |
+
browser.close()
|
| 16 |
+
|
| 17 |
+
# Parse the HTML content
|
| 18 |
+
soup = BeautifulSoup(content, 'html.parser')
|
| 19 |
+
|
| 20 |
+
# Extract relevant text (modify the selector as needed)
|
| 21 |
+
text_elements = soup.find_all(['main'])
|
| 22 |
+
text_content = ' '.join([element.get_text() for element in text_elements])
|
| 23 |
+
|
| 24 |
+
return text_content.strip()
|
tts.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def text_to_speech(text, api_key):
|
| 2 |
+
import requests
|
| 3 |
+
|
| 4 |
+
url = "https://api.elevenlabs.io/v1/text-to-speech"
|
| 5 |
+
headers = {
|
| 6 |
+
"Authorization": f"Bearer {api_key}",
|
| 7 |
+
"Content-Type": "application/json"
|
| 8 |
+
}
|
| 9 |
+
data = {
|
| 10 |
+
"text": text,
|
| 11 |
+
"voice": "en_us_male", # Specify the desired voice
|
| 12 |
+
"output_format": "mp3" # Specify the desired output format
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
response = requests.post(url, headers=headers, json=data)
|
| 16 |
+
|
| 17 |
+
if response.status_code == 200:
|
| 18 |
+
audio_content = response.content
|
| 19 |
+
with open("podcast_episode.mp3", "wb") as audio_file:
|
| 20 |
+
audio_file.write(audio_content)
|
| 21 |
+
return "podcast_episode.mp3"
|
| 22 |
+
else:
|
| 23 |
+
raise Exception(f"Error: {response.status_code}, {response.text}")
|