Spaces:

malavika-2016
/

LearnOutLoud

Sleeping

App Files Files Community

LearnOutLoud / app.py

malavika-2016

Update app.py

0573542 verified about 1 year ago

raw

history blame contribute delete

2.24 kB

	import os
	import gradio as gr
	import tempfile
	import fitz # No longer used, you can uninstall this if not needed
	import edge_tts
	import asyncio
	import uuid
	from pydub import AudioSegment
	from google import genai

	# Configure Gemini API client
	client = genai.Client(api_key=os.getenv("aipi"))

	# Async TTS function
	async def synthesize_speech(text, voice, output_path):
	communicate = edge_tts.Communicate(text, voice)
	await communicate.save(output_path)

	# Prompt Gemini to generate a podcast script
	def generate_script(topic_text):
	prompt = (
	f"Create a podcast-style script where a male speaker (Alex) and a female speaker (Maya) "
	f"discuss the topic below in a friendly, engaging way. The script should alternate between their lines.\n\n"
	f"Topic: {topic_text}"
	)
	response = client.models.generate_content(
	model="gemini-2.0-flash",
	contents=[prompt]
	)
	return response.text

	# Parse script and generate audio
	def create_podcast_audio(script_text):
	lines = script_text.strip().split("\n")
	audio_segments = []

	for line in lines:
	if ":" in line:
	speaker, text = line.split(":", 1)
	voice = "en-US-GuyNeural" if "Alex" in speaker else "en-US-JennyNeural"
	temp_filename = f"/tmp/{uuid.uuid4()}.mp3"
	asyncio.run(synthesize_speech(text.strip(), voice, temp_filename))
	segment = AudioSegment.from_file(temp_filename, format="mp3")
	audio_segments.append(segment)
	os.remove(temp_filename)

	final_audio = sum(audio_segments)
	final_audio_path = tempfile.mktemp(suffix=".mp3")
	final_audio.export(final_audio_path, format="mp3")

	return final_audio_path

	# Main handler
	def handle_input(text):
	if not text:
	return None
	script = generate_script(text)
	audio_path = create_podcast_audio(script)
	return audio_path

	# Gradio UI (Simplified)
	gr.Interface(
	fn=handle_input,
	inputs=gr.Textbox(label="Enter Topic Text"),
	outputs=gr.Audio(label="Generated Podcast Audio"),
	title="Learn Out Loud",
	description="Enter a topic to generate a podcast-style audio conversation."
	).launch(server_name="0.0.0.0", server_port=7860)