Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import tempfile | |
| import fitz # No longer used, you can uninstall this if not needed | |
| import edge_tts | |
| import asyncio | |
| import uuid | |
| from pydub import AudioSegment | |
| from google import genai | |
| # Configure Gemini API client | |
| client = genai.Client(api_key=os.getenv("aipi")) | |
| # Async TTS function | |
| async def synthesize_speech(text, voice, output_path): | |
| communicate = edge_tts.Communicate(text, voice) | |
| await communicate.save(output_path) | |
| # Prompt Gemini to generate a podcast script | |
| def generate_script(topic_text): | |
| prompt = ( | |
| f"Create a podcast-style script where a male speaker (Alex) and a female speaker (Maya) " | |
| f"discuss the topic below in a friendly, engaging way. The script should alternate between their lines.\n\n" | |
| f"Topic: {topic_text}" | |
| ) | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| contents=[prompt] | |
| ) | |
| return response.text | |
| # Parse script and generate audio | |
| def create_podcast_audio(script_text): | |
| lines = script_text.strip().split("\n") | |
| audio_segments = [] | |
| for line in lines: | |
| if ":" in line: | |
| speaker, text = line.split(":", 1) | |
| voice = "en-US-GuyNeural" if "Alex" in speaker else "en-US-JennyNeural" | |
| temp_filename = f"/tmp/{uuid.uuid4()}.mp3" | |
| asyncio.run(synthesize_speech(text.strip(), voice, temp_filename)) | |
| segment = AudioSegment.from_file(temp_filename, format="mp3") | |
| audio_segments.append(segment) | |
| os.remove(temp_filename) | |
| final_audio = sum(audio_segments) | |
| final_audio_path = tempfile.mktemp(suffix=".mp3") | |
| final_audio.export(final_audio_path, format="mp3") | |
| return final_audio_path | |
| # Main handler | |
| def handle_input(text): | |
| if not text: | |
| return None | |
| script = generate_script(text) | |
| audio_path = create_podcast_audio(script) | |
| return audio_path | |
| # Gradio UI (Simplified) | |
| gr.Interface( | |
| fn=handle_input, | |
| inputs=gr.Textbox(label="Enter Topic Text"), | |
| outputs=gr.Audio(label="Generated Podcast Audio"), | |
| title="Learn Out Loud", | |
| description="Enter a topic to generate a podcast-style audio conversation." | |
| ).launch(server_name="0.0.0.0", server_port=7860) | |