Spaces:

DrishtiSharma
/

multilingual-document-analyzer-n-podcast-generator

Sleeping

App Files Files Community

DrishtiSharma commited on Jun 21

Commit

0fcb9c1

verified ·

1 Parent(s): 3aca8d3

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -22

app.py CHANGED Viewed

@@ -141,7 +141,7 @@ class DocumentRAG:
         except Exception as e:
             return f"Error generating summary: {str(e)}"
-    def create_podcast(self, language):
         """Generate a podcast script and audio based on doc summary in the specified language."""
         if not self.document_summary:
             return "Please process documents before generating a podcast.", None
@@ -164,14 +164,11 @@ class DocumentRAG:
                     3. Discuss Limitations of the research work.
                     4. Present the Conclusion
                     5. Mention Future Work
                     Clearly label the dialogue as 'Host 1:' and 'Host 2:'. Maintain a tone that is engaging, conversational,
-                    and insightful, while ensuring the flow remains logical and natural. Include a well-structured opening
-                    to introduce the topic and a clear, thoughtful closing that provides a smooth conclusion, avoiding any
-                    abrupt endings."""
-                    },
-                    {"role": "user", "content": f"""
-                    Document Summary: {self.document_summary}"""}
                 ],
                 temperature=0.7
             )
@@ -194,21 +191,43 @@ class DocumentRAG:
                     continue
                 try:
-                    voice = "nova" if is_first_speaker else "onyx"
-                    audio_response = client.audio.speech.create(
-                        model="tts-1",
-                        voice=voice,
-                        input=text.strip()
-                    )
-                    temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-                    audio_response.stream_to_file(temp_audio_file.name)
                     segment = AudioSegment.from_file(temp_audio_file.name)
                     final_audio += segment
                     final_audio += AudioSegment.silent(duration=300)
                     is_first_speaker = not is_first_speaker
                 except Exception as e:
                     print(f"Error generating audio for line: {text}")
                     print(f"Details: {e}")
@@ -224,6 +243,7 @@ class DocumentRAG:
         except Exception as e:
             return f"Error generating podcast: {str(e)}", None
     def handle_query(self, question, history, language):
         """Handle user queries in the specified language."""
         if not self.qa_chain:
@@ -349,15 +369,32 @@ podcast_language = st.radio(
     key="podcast_language"
 )
 if st.session_state.rag_system.document_summary:
     if st.button("Generate Podcast"):
         with st.spinner("Generating podcast, please wait..."):
-            script, audio_path = st.session_state.rag_system.create_podcast(podcast_language)
         if audio_path:
             st.text_area("Generated Podcast Script", script, height=200)
             st.audio(audio_path, format="audio/mp3")
-            # Add this block to enable download
             with open(audio_path, "rb") as audio_file:
                 st.download_button(
                     label="Download Podcast (.mp3)",
@@ -366,8 +403,6 @@ if st.session_state.rag_system.document_summary:
                     mime="audio/mpeg"
                 )
             st.success("Podcast generated successfully! You can listen to it above.")
         else:
             st.error(script)

         except Exception as e:
             return f"Error generating summary: {str(e)}"
+    def create_podcast(self, language, tts_engine, elevenlabs_api_key=None):
         """Generate a podcast script and audio based on doc summary in the specified language."""
         if not self.document_summary:
             return "Please process documents before generating a podcast.", None
                     3. Discuss Limitations of the research work.
                     4. Present the Conclusion
                     5. Mention Future Work
                     Clearly label the dialogue as 'Host 1:' and 'Host 2:'. Maintain a tone that is engaging, conversational,
+                    and insightful. Include a clear, thoughtful closing.
+                    """},
+                    {"role": "user", "content": f"Document Summary: {self.document_summary}"}
                 ],
                 temperature=0.7
             )
                     continue
                 try:
+                    if tts_engine == "OpenAI":
+                        voice = "nova" if is_first_speaker else "onyx"
+                        audio_response = client.audio.speech.create(
+                            model="tts-1",
+                            voice=voice,
+                            input=text.strip()
+                        )
+                        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+                        audio_response.stream_to_file(temp_audio_file.name)
+                    elif tts_engine == "ElevenLabs":
+                        if not elevenlabs_api_key:
+                            return "Please provide your ElevenLabs API key.", None
+                        from elevenlabs import generate, set_api_key
+                        set_api_key(elevenlabs_api_key)
+                        voice_name = "Bella" if is_first_speaker else "Adam"
+                        audio_bytes = generate(
+                            text[:250],
+                            voice=voice_name,
+                            model="eleven_multilingual_v2"
+                        )
+                        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+                        temp_audio_file.write(audio_bytes)
+                        temp_audio_file.flush()
+                    else:
+                        return "Invalid TTS engine selected.", None
                     segment = AudioSegment.from_file(temp_audio_file.name)
                     final_audio += segment
                     final_audio += AudioSegment.silent(duration=300)
                     is_first_speaker = not is_first_speaker
                 except Exception as e:
                     print(f"Error generating audio for line: {text}")
                     print(f"Details: {e}")
         except Exception as e:
             return f"Error generating podcast: {str(e)}", None
     def handle_query(self, question, history, language):
         """Handle user queries in the specified language."""
         if not self.qa_chain:
     key="podcast_language"
 )
+# TTS Engine Selector
+st.write("Select TTS Engine:")
+tts_engine = st.radio(
+    "Choose voice generation engine:",
+    options=["OpenAI", "ElevenLabs"],
+    horizontal=True,
+    key="tts_engine"
+)
+# Optional ElevenLabs API Key Input
+elevenlabs_api_key = None
+if tts_engine == "ElevenLabs":
+    elevenlabs_api_key = st.text_input("Enter your ElevenLabs API Key:", type="password")
 if st.session_state.rag_system.document_summary:
     if st.button("Generate Podcast"):
         with st.spinner("Generating podcast, please wait..."):
+            script, audio_path = st.session_state.rag_system.create_podcast(
+                podcast_language,
+                tts_engine,
+                elevenlabs_api_key=elevenlabs_api_key
+            )
         if audio_path:
             st.text_area("Generated Podcast Script", script, height=200)
             st.audio(audio_path, format="audio/mp3")
             with open(audio_path, "rb") as audio_file:
                 st.download_button(
                     label="Download Podcast (.mp3)",
                     mime="audio/mpeg"
                 )
             st.success("Podcast generated successfully! You can listen to it above.")
         else:
             st.error(script)