Spaces:

malavika-2016
/

LearnOutLoud

Sleeping

App Files Files Community

malavika-2016 commited on May 16, 2025

Commit

140e512

verified ·

1 Parent(s): 4bb0b1c

Create app.py

Browse files

Files changed (1) hide show

app.py +105 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import gradio as gr
+from flask import Flask
+import tempfile
+import fitz  # PyMuPDF
+import edge_tts
+import asyncio
+import uuid
+from pydub import AudioSegment
+from google import genai
+# Configure Gemini API
+genai.configure(api_key="YOUR_API_KEY")
+app = Flask(__name__)
+# Async TTS function
+async def synthesize_speech(text, voice, output_path):
+    communicate = edge_tts.Communicate(text, voice)
+    await communicate.save(output_path)
+# Extract text from PDF
+def extract_text_from_pdf(file_path):
+    text = ""
+    with fitz.open(file_path) as doc:
+        for page in doc:
+            text += page.get_text()
+    return text
+# Prompt Gemini to generate a podcast script
+def generate_script(topic_text):
+    prompt = f"Create a podcast-style script where a male speaker (Alex) and a female speaker (Maya) discuss the topic below in a friendly, engaging way. The script should alternate between their lines.\n\nTopic: {topic_text}"
+    response = genai.Client().models.generate_content(
+        model="gemini-2.0-flash",
+        contents=[prompt]
+    )
+    return response.text
+# Parse script and generate audio
+def create_podcast_audio(script_text):
+    lines = script_text.strip().split("\n")
+    audio_segments = []
+    transcript = []
+    for i, line in enumerate(lines):
+        if ":" in line:
+            speaker, text = line.split(":", 1)
+            voice = "en-US-GuyNeural" if "Alex" in speaker else "en-US-JennyNeural"
+            temp_filename = f"/tmp/{uuid.uuid4()}.mp3"
+            asyncio.run(synthesize_speech(text.strip(), voice, temp_filename))
+            segment = AudioSegment.from_file(temp_filename, format="mp3")
+            audio_segments.append(segment)
+            transcript.append((speaker.strip(), text.strip(), len(segment)))
+            os.remove(temp_filename)
+    final_audio = sum(audio_segments)
+    final_audio_path = tempfile.mktemp(suffix=".mp3")
+    final_audio.export(final_audio_path, format="mp3")
+    # Add timestamps to transcript
+    timeline = []
+    current_time = 0
+    for speaker, text, duration in transcript:
+        timeline.append(f"[{current_time // 1000}s] {speaker}: {text}")
+        current_time += duration
+    return final_audio_path, "\n".join(timeline)
+# Main handler
+def handle_input(text, pdf):
+    if pdf:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
+            temp_pdf.write(pdf.read())
+            temp_pdf_path = temp_pdf.name
+        input_text = extract_text_from_pdf(temp_pdf_path)
+        os.remove(temp_pdf_path)
+    elif text:
+        input_text = text
+    else:
+        return "Please enter text or upload a PDF.", None, None
+    script = generate_script(input_text)
+    audio_path, transcript = create_podcast_audio(script)
+    return script, audio_path, transcript
+# Gradio UI
+gradio_ui = gr.Interface(
+    fn=handle_input,
+    inputs=[
+        gr.Textbox(label="Enter Topic Text"),
+        gr.File(label="Or Upload PDF", file_types=[".pdf"])
+    ],
+    outputs=[
+        gr.Textbox(label="Generated Podcast Script"),
+        gr.Audio(label="Podcast Audio"),
+        gr.Textbox(label="Transcript with Timestamps")
+    ],
+    title="Learn Out Loud",
+    description="Upload text or a PDF to generate a podcast script with audio and transcript."
+)
+app = gr.mount_gradio_app(app, gradio_ui, path="/")
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)