malavika-2016 commited on
Commit
140e512
·
verified ·
1 Parent(s): 4bb0b1c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from flask import Flask
4
+ import tempfile
5
+ import fitz # PyMuPDF
6
+ import edge_tts
7
+ import asyncio
8
+ import uuid
9
+ from pydub import AudioSegment
10
+ from google import genai
11
+
12
+ # Configure Gemini API
13
+ genai.configure(api_key="YOUR_API_KEY")
14
+
15
+ app = Flask(__name__)
16
+
17
+ # Async TTS function
18
+ async def synthesize_speech(text, voice, output_path):
19
+ communicate = edge_tts.Communicate(text, voice)
20
+ await communicate.save(output_path)
21
+
22
+ # Extract text from PDF
23
+ def extract_text_from_pdf(file_path):
24
+ text = ""
25
+ with fitz.open(file_path) as doc:
26
+ for page in doc:
27
+ text += page.get_text()
28
+ return text
29
+
30
+ # Prompt Gemini to generate a podcast script
31
+ def generate_script(topic_text):
32
+ prompt = f"Create a podcast-style script where a male speaker (Alex) and a female speaker (Maya) discuss the topic below in a friendly, engaging way. The script should alternate between their lines.\n\nTopic: {topic_text}"
33
+ response = genai.Client().models.generate_content(
34
+ model="gemini-2.0-flash",
35
+ contents=[prompt]
36
+ )
37
+ return response.text
38
+
39
+ # Parse script and generate audio
40
+ def create_podcast_audio(script_text):
41
+ lines = script_text.strip().split("\n")
42
+ audio_segments = []
43
+ transcript = []
44
+
45
+ for i, line in enumerate(lines):
46
+ if ":" in line:
47
+ speaker, text = line.split(":", 1)
48
+ voice = "en-US-GuyNeural" if "Alex" in speaker else "en-US-JennyNeural"
49
+ temp_filename = f"/tmp/{uuid.uuid4()}.mp3"
50
+ asyncio.run(synthesize_speech(text.strip(), voice, temp_filename))
51
+ segment = AudioSegment.from_file(temp_filename, format="mp3")
52
+ audio_segments.append(segment)
53
+ transcript.append((speaker.strip(), text.strip(), len(segment)))
54
+ os.remove(temp_filename)
55
+
56
+ final_audio = sum(audio_segments)
57
+ final_audio_path = tempfile.mktemp(suffix=".mp3")
58
+ final_audio.export(final_audio_path, format="mp3")
59
+
60
+ # Add timestamps to transcript
61
+ timeline = []
62
+ current_time = 0
63
+ for speaker, text, duration in transcript:
64
+ timeline.append(f"[{current_time // 1000}s] {speaker}: {text}")
65
+ current_time += duration
66
+
67
+ return final_audio_path, "\n".join(timeline)
68
+
69
+ # Main handler
70
+ def handle_input(text, pdf):
71
+ if pdf:
72
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
73
+ temp_pdf.write(pdf.read())
74
+ temp_pdf_path = temp_pdf.name
75
+ input_text = extract_text_from_pdf(temp_pdf_path)
76
+ os.remove(temp_pdf_path)
77
+ elif text:
78
+ input_text = text
79
+ else:
80
+ return "Please enter text or upload a PDF.", None, None
81
+
82
+ script = generate_script(input_text)
83
+ audio_path, transcript = create_podcast_audio(script)
84
+ return script, audio_path, transcript
85
+
86
+ # Gradio UI
87
+ gradio_ui = gr.Interface(
88
+ fn=handle_input,
89
+ inputs=[
90
+ gr.Textbox(label="Enter Topic Text"),
91
+ gr.File(label="Or Upload PDF", file_types=[".pdf"])
92
+ ],
93
+ outputs=[
94
+ gr.Textbox(label="Generated Podcast Script"),
95
+ gr.Audio(label="Podcast Audio"),
96
+ gr.Textbox(label="Transcript with Timestamps")
97
+ ],
98
+ title="Learn Out Loud",
99
+ description="Upload text or a PDF to generate a podcast script with audio and transcript."
100
+ )
101
+
102
+ app = gr.mount_gradio_app(app, gradio_ui, path="/")
103
+
104
+ if __name__ == "__main__":
105
+ app.run(host="0.0.0.0", port=7860)