Spaces:

AdithyaSNair
/

AdithyaLM

Sleeping

App Files Files Community

AdithyaLM / app.py

AdithyaSNair

added base files

35cda3c verified over 1 year ago

raw

history blame contribute delete

5.67 kB

	import streamlit as st
	import os
	import json
	import shutil
	import re
	import requests
	import pyttsx3
	from pydub import AudioSegment
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()

	# Streamlit configuration
	st.set_page_config(page_title="Podcast Generator", layout="wide")
	st.title("🎙️ Podcast Generator")

	# System prompt for conversation generation
	system_prompt = """you are an experienced podcast host...
	- based on text like an article you can create an engaging conversation between two people.
	- make the conversation engaging with a lot of emotion.
	- in the response, identify speakers as Sascha and Marina.
	- Sascha is the writer, and Marina is the one asking questions.
	- The podcast is called The Machine Learning Engineer.
	- Short sentences that can be easily used with speech synthesis.
	- Use natural conversation fillers like "äh" to make it sound real.
	"""

	# Load Hugging Face's distilgpt2 model and tokenizer
	model_name = "distilgpt2"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	# Pyttsx3 setup
	engine = pyttsx3.init()
	engine.setProperty("rate", 150) # Adjust speech rate as needed
	engine.setProperty("voice", "english") # Set to English voice

	# Retrieve ElevenLabs API key from environment
	elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
	elevenlabs_url = "https://api.elevenlabs.io/v1/text-to-speech/ERL3svWBAQ18ByCZTr4k"
	elevenlabs_headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": elevenlabs_api_key
	}

	# ElevenLabs TTS function for Sascha
	def synthesize_speech_elevenlabs(text, speaker, index):
	data = {
	"text": text,
	"model_id": "eleven_turbo_v2_5",
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.75
	}
	}
	response = requests.post(elevenlabs_url, json=data, headers=elevenlabs_headers)
	filename = f"audio-files/{index}_{speaker}.mp3"
	with open(filename, "wb") as out:
	for chunk in response.iter_content(chunk_size=1024):
	if chunk:
	out.write(chunk)

	# Pyttsx3 TTS function for Marina
	def synthesize_speech_pyttsx3(text, speaker, index):
	filename = f"audio-files/{index}_{speaker}.mp3"
	engine.save_to_file(text, filename)
	engine.runAndWait()

	# Function to synthesize speech based on the speaker
	def synthesize_speech(text, speaker, index):
	if speaker == "Sascha":
	synthesize_speech_elevenlabs(text, speaker, index)
	else:
	synthesize_speech_pyttsx3(text, speaker, index)

	# Function to sort filenames naturally
	def natural_sort_key(filename):
	return [int(text) if text.isdigit() else text for text in re.split(r'(\d+)', filename)]

	# Function to merge audio files
	def merge_audios(audio_folder, output_file):
	combined = AudioSegment.empty()
	audio_files = sorted(
	[f for f in os.listdir(audio_folder) if f.endswith(".mp3") or f.endswith(".wav")],
	key=natural_sort_key
	)
	for filename in audio_files:
	audio_path = os.path.join(audio_folder, filename)
	audio = AudioSegment.from_file(audio_path)
	combined += audio
	combined.export(output_file, format="mp3")

	# Function to generate the conversation using distilgpt2
	def generate_conversation(article):
	prompt = system_prompt + "\n\nArticle:\n" + article + "\n\nSascha: "
	input_ids = tokenizer.encode(prompt, return_tensors="pt")
	output = model.generate(input_ids, max_length=8192, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id)

	# Process output to create a structured conversation
	conversation_text = tokenizer.decode(output[0], skip_special_tokens=True)
	lines = conversation_text.splitlines()
	conversation = []
	speaker = "Sascha"
	for line in lines:
	if line.strip():
	conversation.append({"speaker": speaker, "text": line.strip()})
	speaker = "Marina" if speaker == "Sascha" else "Sascha"
	return conversation

	# Function to generate the podcast audio from conversation data
	def generate_audio(conversation):
	if os.path.exists('audio-files'):
	shutil.rmtree('audio-files')
	os.makedirs('audio-files', exist_ok=True)

	for index, part in enumerate(conversation):
	speaker = part['speaker']
	text = part['text']
	synthesize_speech(text, speaker, index)

	output_file = "podcast.mp3"
	merge_audios("audio-files", output_file)
	return output_file

	# Streamlit inputs and outputs
	article = st.text_area("Article Content", "Paste the article text here", height=300)
	if st.button("Generate Podcast"):
	if not article:
	st.error("Please enter article content to generate a podcast.")
	else:
	with st.spinner("Generating conversation..."):
	conversation = generate_conversation(article)

	st.success("Conversation generated successfully!")
	st.json(conversation)

	# Generate audio files
	with st.spinner("Synthesizing audio..."):
	podcast_file = generate_audio(conversation)

	st.success("Audio synthesis complete!")
	st.audio(podcast_file, format="audio/mp3")

	with open(podcast_file, "rb") as file:
	st.download_button("Download Podcast", data=file, file_name="podcast.mp3", mime="audio/mp3")