Spaces:
Sleeping
Sleeping
File size: 2,235 Bytes
140e512 0573542 140e512 77d9863 0573542 140e512 0573542 0111403 140e512 77d9863 140e512 0573542 140e512 0573542 140e512 0573542 140e512 0573542 140e512 0573542 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import os
import gradio as gr
import tempfile
import fitz # No longer used, you can uninstall this if not needed
import edge_tts
import asyncio
import uuid
from pydub import AudioSegment
from google import genai
# Configure Gemini API client
client = genai.Client(api_key=os.getenv("aipi"))
# Async TTS function
async def synthesize_speech(text, voice, output_path):
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output_path)
# Prompt Gemini to generate a podcast script
def generate_script(topic_text):
prompt = (
f"Create a podcast-style script where a male speaker (Alex) and a female speaker (Maya) "
f"discuss the topic below in a friendly, engaging way. The script should alternate between their lines.\n\n"
f"Topic: {topic_text}"
)
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[prompt]
)
return response.text
# Parse script and generate audio
def create_podcast_audio(script_text):
lines = script_text.strip().split("\n")
audio_segments = []
for line in lines:
if ":" in line:
speaker, text = line.split(":", 1)
voice = "en-US-GuyNeural" if "Alex" in speaker else "en-US-JennyNeural"
temp_filename = f"/tmp/{uuid.uuid4()}.mp3"
asyncio.run(synthesize_speech(text.strip(), voice, temp_filename))
segment = AudioSegment.from_file(temp_filename, format="mp3")
audio_segments.append(segment)
os.remove(temp_filename)
final_audio = sum(audio_segments)
final_audio_path = tempfile.mktemp(suffix=".mp3")
final_audio.export(final_audio_path, format="mp3")
return final_audio_path
# Main handler
def handle_input(text):
if not text:
return None
script = generate_script(text)
audio_path = create_podcast_audio(script)
return audio_path
# Gradio UI (Simplified)
gr.Interface(
fn=handle_input,
inputs=gr.Textbox(label="Enter Topic Text"),
outputs=gr.Audio(label="Generated Podcast Audio"),
title="Learn Out Loud",
description="Enter a topic to generate a podcast-style audio conversation."
).launch(server_name="0.0.0.0", server_port=7860)
|