Update app.py
Browse files
app.py
CHANGED
|
@@ -14,23 +14,39 @@ logger = logging.getLogger(__name__)
|
|
| 14 |
# Initialize Gemini AI
|
| 15 |
genai.configure(api_key='YOUR_GEMINI_API_KEY')
|
| 16 |
|
| 17 |
-
def generate_podcast_script(api_key, content, duration):
|
| 18 |
genai.configure(api_key=api_key)
|
| 19 |
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
response = model.generate_content(prompt)
|
| 35 |
# Remove any special characters that might be read aloud
|
| 36 |
clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text)
|
|
@@ -45,14 +61,19 @@ async def text_to_speech(text, voice):
|
|
| 45 |
audio.seek(0)
|
| 46 |
return audio.read()
|
| 47 |
|
| 48 |
-
async def render_podcast(api_key, script, voice1, voice2):
|
| 49 |
lines = [line for line in script.split('\n') if line.strip()]
|
| 50 |
audio_segments = []
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
if not audio_segments:
|
| 58 |
logger.warning("No valid audio segments were generated.")
|
|
@@ -128,6 +149,8 @@ with gr.Blocks() as demo:
|
|
| 128 |
|
| 129 |
duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
|
| 130 |
|
|
|
|
|
|
|
| 131 |
voice_dict = asyncio.run(get_voice_list())
|
| 132 |
languages = list(voice_dict.keys())
|
| 133 |
languages.insert(0, "None") # Add "None" option for single speaker
|
|
@@ -166,18 +189,19 @@ with gr.Blocks() as demo:
|
|
| 166 |
lang1_select.change(update_voices, inputs=[lang1_select], outputs=[voice1_select])
|
| 167 |
lang2_select.change(update_voices, inputs=[lang2_select], outputs=[voice2_select])
|
| 168 |
|
| 169 |
-
def generate_script_wrapper(api_key, content, duration):
|
| 170 |
-
return generate_podcast_script(api_key, content, duration)
|
| 171 |
|
| 172 |
-
async def render_podcast_wrapper(api_key, script, voice1, voice2):
|
| 173 |
-
|
| 174 |
-
# Use only voice1 for all lines
|
| 175 |
-
return await render_podcast(api_key, script, voice1, voice1)
|
| 176 |
-
else:
|
| 177 |
-
return await render_podcast(api_key, script, voice1, voice2)
|
| 178 |
|
| 179 |
-
generate_btn.click(generate_script_wrapper, inputs=[api_key_input, content_input, duration], outputs=script_output)
|
| 180 |
-
render_btn.click(render_podcast_wrapper, inputs=[api_key_input, script_output, voice1_select, voice2_select], outputs=audio_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
if __name__ == "__main__":
|
| 183 |
demo.launch()
|
|
|
|
| 14 |
# Initialize Gemini AI
|
| 15 |
genai.configure(api_key='YOUR_GEMINI_API_KEY')
|
| 16 |
|
| 17 |
+
def generate_podcast_script(api_key, content, duration, num_hosts):
|
| 18 |
genai.configure(api_key=api_key)
|
| 19 |
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
|
| 20 |
|
| 21 |
+
if num_hosts == 1:
|
| 22 |
+
prompt = f"""
|
| 23 |
+
Create a podcast script for one person discussing the following content:
|
| 24 |
+
{content}
|
| 25 |
+
|
| 26 |
+
The podcast should last approximately {duration}. Include natural speech patterns,
|
| 27 |
+
humor, and occasional off-topic thoughts. Use occasional speech fillers like um, ah,
|
| 28 |
+
yes, I see, Ok now. Vary the emotional tone.
|
| 29 |
+
Format the script as a monologue without speaker labels.
|
| 30 |
+
Separate each paragraph with a blank line.
|
| 31 |
+
Do not use any special characters or markdown. Only include the monologue with proper punctuation.
|
| 32 |
+
Ensure the content flows naturally and stays relevant to the topic.
|
| 33 |
+
Limit the script length to match the requested duration of {duration}.
|
| 34 |
+
"""
|
| 35 |
+
else:
|
| 36 |
+
prompt = f"""
|
| 37 |
+
Create a podcast script for two people discussing the following content:
|
| 38 |
+
{content}
|
| 39 |
+
|
| 40 |
+
The podcast should last approximately {duration}. Include natural speech patterns,
|
| 41 |
+
humor, and occasional off-topic chit-chat. Use occasional speech fillers like um, ah,
|
| 42 |
+
yes, I see, Ok now. Vary the emotional tone.
|
| 43 |
+
Format the script as alternating lines of dialogue without speaker labels.
|
| 44 |
+
Separate each line with a blank line.
|
| 45 |
+
Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
|
| 46 |
+
Ensure the conversation flows naturally and stays relevant to the topic.
|
| 47 |
+
Limit the script length to match the requested duration of {duration}.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
response = model.generate_content(prompt)
|
| 51 |
# Remove any special characters that might be read aloud
|
| 52 |
clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text)
|
|
|
|
| 61 |
audio.seek(0)
|
| 62 |
return audio.read()
|
| 63 |
|
| 64 |
+
async def render_podcast(api_key, script, voice1, voice2, num_hosts):
|
| 65 |
lines = [line for line in script.split('\n') if line.strip()]
|
| 66 |
audio_segments = []
|
| 67 |
|
| 68 |
+
if num_hosts == 1:
|
| 69 |
+
for line in lines:
|
| 70 |
+
audio = await text_to_speech(line, voice1)
|
| 71 |
+
audio_segments.append(audio)
|
| 72 |
+
else:
|
| 73 |
+
for i, line in enumerate(lines):
|
| 74 |
+
voice = voice1 if i % 2 == 0 else voice2
|
| 75 |
+
audio = await text_to_speech(line, voice)
|
| 76 |
+
audio_segments.append(audio)
|
| 77 |
|
| 78 |
if not audio_segments:
|
| 79 |
logger.warning("No valid audio segments were generated.")
|
|
|
|
| 149 |
|
| 150 |
duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
|
| 151 |
|
| 152 |
+
num_hosts = gr.Radio([1, 2], label="Number of podcast hosts", value=2)
|
| 153 |
+
|
| 154 |
voice_dict = asyncio.run(get_voice_list())
|
| 155 |
languages = list(voice_dict.keys())
|
| 156 |
languages.insert(0, "None") # Add "None" option for single speaker
|
|
|
|
| 189 |
lang1_select.change(update_voices, inputs=[lang1_select], outputs=[voice1_select])
|
| 190 |
lang2_select.change(update_voices, inputs=[lang2_select], outputs=[voice2_select])
|
| 191 |
|
| 192 |
+
def generate_script_wrapper(api_key, content, duration, num_hosts):
|
| 193 |
+
return generate_podcast_script(api_key, content, duration, num_hosts)
|
| 194 |
|
| 195 |
+
async def render_podcast_wrapper(api_key, script, voice1, voice2, num_hosts):
|
| 196 |
+
return await render_podcast(api_key, script, voice1, voice2, num_hosts)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
+
generate_btn.click(generate_script_wrapper, inputs=[api_key_input, content_input, duration, num_hosts], outputs=script_output)
|
| 199 |
+
render_btn.click(render_podcast_wrapper, inputs=[api_key_input, script_output, voice1_select, voice2_select, num_hosts], outputs=audio_output)
|
| 200 |
+
|
| 201 |
+
def update_second_voice_visibility(num_hosts):
|
| 202 |
+
return gr.update(visible=num_hosts == 2), gr.update(visible=num_hosts == 2)
|
| 203 |
+
|
| 204 |
+
num_hosts.change(update_second_voice_visibility, inputs=[num_hosts], outputs=[lang2_select, voice2_select])
|
| 205 |
|
| 206 |
if __name__ == "__main__":
|
| 207 |
demo.launch()
|