Spaces:

MicroHealth
/

ai-podcast-builder

Paused

App Files Files Community

bluenevus commited on Apr 13, 2025

Commit

d982b19

verified ·

1 Parent(s): d34eb74

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -29

app.py CHANGED Viewed

@@ -14,23 +14,39 @@ logger = logging.getLogger(__name__)
 # Initialize Gemini AI
 genai.configure(api_key='YOUR_GEMINI_API_KEY')
-def generate_podcast_script(api_key, content, duration):
     genai.configure(api_key=api_key)
     model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
-    prompt = f"""
-    Create a podcast script for two people discussing the following content:
-    {content}
-    The podcast should last approximately {duration}. Include natural speech patterns,
-    humor, and occasional off-topic chit-chat. Use occasional speech fillers like um, ah,
-    yes, I see, Ok now. Vary the emotional tone.
-    Format the script as alternating lines of dialogue without speaker labels.
-    Separate each line with a blank line.
-    Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
-    Ensure the conversation flows naturally and stays relevant to the topic.
-    Limit the script length to match the requested duration of {duration}.
-    """
     response = model.generate_content(prompt)
     # Remove any special characters that might be read aloud
     clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text)
@@ -45,14 +61,19 @@ async def text_to_speech(text, voice):
     audio.seek(0)
     return audio.read()
-async def render_podcast(api_key, script, voice1, voice2):
     lines = [line for line in script.split('\n') if line.strip()]
     audio_segments = []
-    for i, line in enumerate(lines):
-        voice = voice1 if i % 2 == 0 else voice2
-        audio = await text_to_speech(line, voice)
-        audio_segments.append(audio)
     if not audio_segments:
         logger.warning("No valid audio segments were generated.")
@@ -128,6 +149,8 @@ with gr.Blocks() as demo:
     duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
     voice_dict = asyncio.run(get_voice_list())
     languages = list(voice_dict.keys())
     languages.insert(0, "None")  # Add "None" option for single speaker
@@ -166,18 +189,19 @@ with gr.Blocks() as demo:
     lang1_select.change(update_voices, inputs=[lang1_select], outputs=[voice1_select])
     lang2_select.change(update_voices, inputs=[lang2_select], outputs=[voice2_select])
-    def generate_script_wrapper(api_key, content, duration):
-        return generate_podcast_script(api_key, content, duration)
-    async def render_podcast_wrapper(api_key, script, voice1, voice2):
-        if voice2 is None or lang2_select.value == "None":
-            # Use only voice1 for all lines
-            return await render_podcast(api_key, script, voice1, voice1)
-        else:
-            return await render_podcast(api_key, script, voice1, voice2)
-    generate_btn.click(generate_script_wrapper, inputs=[api_key_input, content_input, duration], outputs=script_output)
-    render_btn.click(render_podcast_wrapper, inputs=[api_key_input, script_output, voice1_select, voice2_select], outputs=audio_output)
 if __name__ == "__main__":
     demo.launch()

 # Initialize Gemini AI
 genai.configure(api_key='YOUR_GEMINI_API_KEY')
+def generate_podcast_script(api_key, content, duration, num_hosts):
     genai.configure(api_key=api_key)
     model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
+    if num_hosts == 1:
+        prompt = f"""
+        Create a podcast script for one person discussing the following content:
+        {content}
+        The podcast should last approximately {duration}. Include natural speech patterns,
+        humor, and occasional off-topic thoughts. Use occasional speech fillers like um, ah,
+        yes, I see, Ok now. Vary the emotional tone.
+        Format the script as a monologue without speaker labels.
+        Separate each paragraph with a blank line.
+        Do not use any special characters or markdown. Only include the monologue with proper punctuation.
+        Ensure the content flows naturally and stays relevant to the topic.
+        Limit the script length to match the requested duration of {duration}.
+        """
+    else:
+        prompt = f"""
+        Create a podcast script for two people discussing the following content:
+        {content}
+        The podcast should last approximately {duration}. Include natural speech patterns,
+        humor, and occasional off-topic chit-chat. Use occasional speech fillers like um, ah,
+        yes, I see, Ok now. Vary the emotional tone.
+        Format the script as alternating lines of dialogue without speaker labels.
+        Separate each line with a blank line.
+        Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
+        Ensure the conversation flows naturally and stays relevant to the topic.
+        Limit the script length to match the requested duration of {duration}.
+        """
     response = model.generate_content(prompt)
     # Remove any special characters that might be read aloud
     clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text)
     audio.seek(0)
     return audio.read()
+async def render_podcast(api_key, script, voice1, voice2, num_hosts):
     lines = [line for line in script.split('\n') if line.strip()]
     audio_segments = []
+    if num_hosts == 1:
+        for line in lines:
+            audio = await text_to_speech(line, voice1)
+            audio_segments.append(audio)
+    else:
+        for i, line in enumerate(lines):
+            voice = voice1 if i % 2 == 0 else voice2
+            audio = await text_to_speech(line, voice)
+            audio_segments.append(audio)
     if not audio_segments:
         logger.warning("No valid audio segments were generated.")
     duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
+    num_hosts = gr.Radio([1, 2], label="Number of podcast hosts", value=2)
     voice_dict = asyncio.run(get_voice_list())
     languages = list(voice_dict.keys())
     languages.insert(0, "None")  # Add "None" option for single speaker
     lang1_select.change(update_voices, inputs=[lang1_select], outputs=[voice1_select])
     lang2_select.change(update_voices, inputs=[lang2_select], outputs=[voice2_select])
+    def generate_script_wrapper(api_key, content, duration, num_hosts):
+        return generate_podcast_script(api_key, content, duration, num_hosts)
+    async def render_podcast_wrapper(api_key, script, voice1, voice2, num_hosts):
+        return await render_podcast(api_key, script, voice1, voice2, num_hosts)
+    generate_btn.click(generate_script_wrapper, inputs=[api_key_input, content_input, duration, num_hosts], outputs=script_output)
+    render_btn.click(render_podcast_wrapper, inputs=[api_key_input, script_output, voice1_select, voice2_select, num_hosts], outputs=audio_output)
+    def update_second_voice_visibility(num_hosts):
+        return gr.update(visible=num_hosts == 2), gr.update(visible=num_hosts == 2)
+    num_hosts.change(update_second_voice_visibility, inputs=[num_hosts], outputs=[lang2_select, voice2_select])
 if __name__ == "__main__":
     demo.launch()