Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import json | |
| import shutil | |
| import re | |
| import requests | |
| import pyttsx3 | |
| from pydub import AudioSegment | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| from dotenv import load_dotenv | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Streamlit configuration | |
| st.set_page_config(page_title="Podcast Generator", layout="wide") | |
| st.title("🎙️ Podcast Generator") | |
| # System prompt for conversation generation | |
| system_prompt = """you are an experienced podcast host... | |
| - based on text like an article you can create an engaging conversation between two people. | |
| - make the conversation engaging with a lot of emotion. | |
| - in the response, identify speakers as Sascha and Marina. | |
| - Sascha is the writer, and Marina is the one asking questions. | |
| - The podcast is called The Machine Learning Engineer. | |
| - Short sentences that can be easily used with speech synthesis. | |
| - Use natural conversation fillers like "äh" to make it sound real. | |
| """ | |
| # Load Hugging Face's distilgpt2 model and tokenizer | |
| model_name = "distilgpt2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| # Pyttsx3 setup | |
| engine = pyttsx3.init() | |
| engine.setProperty("rate", 150) # Adjust speech rate as needed | |
| engine.setProperty("voice", "english") # Set to English voice | |
| # Retrieve ElevenLabs API key from environment | |
| elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY") | |
| elevenlabs_url = "https://api.elevenlabs.io/v1/text-to-speech/ERL3svWBAQ18ByCZTr4k" | |
| elevenlabs_headers = { | |
| "Accept": "audio/mpeg", | |
| "Content-Type": "application/json", | |
| "xi-api-key": elevenlabs_api_key | |
| } | |
| # ElevenLabs TTS function for Sascha | |
| def synthesize_speech_elevenlabs(text, speaker, index): | |
| data = { | |
| "text": text, | |
| "model_id": "eleven_turbo_v2_5", | |
| "voice_settings": { | |
| "stability": 0.5, | |
| "similarity_boost": 0.75 | |
| } | |
| } | |
| response = requests.post(elevenlabs_url, json=data, headers=elevenlabs_headers) | |
| filename = f"audio-files/{index}_{speaker}.mp3" | |
| with open(filename, "wb") as out: | |
| for chunk in response.iter_content(chunk_size=1024): | |
| if chunk: | |
| out.write(chunk) | |
| # Pyttsx3 TTS function for Marina | |
| def synthesize_speech_pyttsx3(text, speaker, index): | |
| filename = f"audio-files/{index}_{speaker}.mp3" | |
| engine.save_to_file(text, filename) | |
| engine.runAndWait() | |
| # Function to synthesize speech based on the speaker | |
| def synthesize_speech(text, speaker, index): | |
| if speaker == "Sascha": | |
| synthesize_speech_elevenlabs(text, speaker, index) | |
| else: | |
| synthesize_speech_pyttsx3(text, speaker, index) | |
| # Function to sort filenames naturally | |
| def natural_sort_key(filename): | |
| return [int(text) if text.isdigit() else text for text in re.split(r'(\d+)', filename)] | |
| # Function to merge audio files | |
| def merge_audios(audio_folder, output_file): | |
| combined = AudioSegment.empty() | |
| audio_files = sorted( | |
| [f for f in os.listdir(audio_folder) if f.endswith(".mp3") or f.endswith(".wav")], | |
| key=natural_sort_key | |
| ) | |
| for filename in audio_files: | |
| audio_path = os.path.join(audio_folder, filename) | |
| audio = AudioSegment.from_file(audio_path) | |
| combined += audio | |
| combined.export(output_file, format="mp3") | |
| # Function to generate the conversation using distilgpt2 | |
| def generate_conversation(article): | |
| prompt = system_prompt + "\n\nArticle:\n" + article + "\n\nSascha: " | |
| input_ids = tokenizer.encode(prompt, return_tensors="pt") | |
| output = model.generate(input_ids, max_length=8192, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id) | |
| # Process output to create a structured conversation | |
| conversation_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
| lines = conversation_text.splitlines() | |
| conversation = [] | |
| speaker = "Sascha" | |
| for line in lines: | |
| if line.strip(): | |
| conversation.append({"speaker": speaker, "text": line.strip()}) | |
| speaker = "Marina" if speaker == "Sascha" else "Sascha" | |
| return conversation | |
| # Function to generate the podcast audio from conversation data | |
| def generate_audio(conversation): | |
| if os.path.exists('audio-files'): | |
| shutil.rmtree('audio-files') | |
| os.makedirs('audio-files', exist_ok=True) | |
| for index, part in enumerate(conversation): | |
| speaker = part['speaker'] | |
| text = part['text'] | |
| synthesize_speech(text, speaker, index) | |
| output_file = "podcast.mp3" | |
| merge_audios("audio-files", output_file) | |
| return output_file | |
| # Streamlit inputs and outputs | |
| article = st.text_area("Article Content", "Paste the article text here", height=300) | |
| if st.button("Generate Podcast"): | |
| if not article: | |
| st.error("Please enter article content to generate a podcast.") | |
| else: | |
| with st.spinner("Generating conversation..."): | |
| conversation = generate_conversation(article) | |
| st.success("Conversation generated successfully!") | |
| st.json(conversation) | |
| # Generate audio files | |
| with st.spinner("Synthesizing audio..."): | |
| podcast_file = generate_audio(conversation) | |
| st.success("Audio synthesis complete!") | |
| st.audio(podcast_file, format="audio/mp3") | |
| with open(podcast_file, "rb") as file: | |
| st.download_button("Download Podcast", data=file, file_name="podcast.mp3", mime="audio/mp3") | |