File size: 2,235 Bytes
140e512
 
 
0573542
140e512
 
 
 
 
 
77d9863
0573542
140e512
 
 
 
 
 
 
 
0573542
 
 
 
 
0111403
140e512
 
 
 
 
 
 
 
 
 
77d9863
140e512
 
 
 
 
 
 
 
 
 
 
 
 
0573542
140e512
 
0573542
 
 
 
 
 
140e512
0573542
 
140e512
0573542
 
140e512
0573542
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import gradio as gr
import tempfile
import fitz  # No longer used, you can uninstall this if not needed
import edge_tts
import asyncio
import uuid
from pydub import AudioSegment
from google import genai

# Configure Gemini API client
client = genai.Client(api_key=os.getenv("aipi"))

# Async TTS function
async def synthesize_speech(text, voice, output_path):
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(output_path)

# Prompt Gemini to generate a podcast script
def generate_script(topic_text):
    prompt = (
        f"Create a podcast-style script where a male speaker (Alex) and a female speaker (Maya) "
        f"discuss the topic below in a friendly, engaging way. The script should alternate between their lines.\n\n"
        f"Topic: {topic_text}"
    )
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=[prompt]
    )
    return response.text

# Parse script and generate audio
def create_podcast_audio(script_text):
    lines = script_text.strip().split("\n")
    audio_segments = []

    for line in lines:
        if ":" in line:
            speaker, text = line.split(":", 1)
            voice = "en-US-GuyNeural" if "Alex" in speaker else "en-US-JennyNeural"
            temp_filename = f"/tmp/{uuid.uuid4()}.mp3"
            asyncio.run(synthesize_speech(text.strip(), voice, temp_filename))
            segment = AudioSegment.from_file(temp_filename, format="mp3")
            audio_segments.append(segment)
            os.remove(temp_filename)

    final_audio = sum(audio_segments)
    final_audio_path = tempfile.mktemp(suffix=".mp3")
    final_audio.export(final_audio_path, format="mp3")

    return final_audio_path

# Main handler
def handle_input(text):
    if not text:
        return None
    script = generate_script(text)
    audio_path = create_podcast_audio(script)
    return audio_path

# Gradio UI (Simplified)
gr.Interface(
    fn=handle_input,
    inputs=gr.Textbox(label="Enter Topic Text"),
    outputs=gr.Audio(label="Generated Podcast Audio"),
    title="Learn Out Loud",
    description="Enter a topic to generate a podcast-style audio conversation."
).launch(server_name="0.0.0.0", server_port=7860)