Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

ce9aea5

verified ·

1 Parent(s): 862568a

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -56

app.py CHANGED Viewed

@@ -5,17 +5,53 @@ from PIL import Image
 import torch
 import os
 import tempfile
-import sys
-import subprocess
-# Try to import gTTS, install if missing
 try:
     from gtts import gTTS
 except ImportError:
-    st.warning("Installing required package: gTTS...")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "gTTS"])
-    from gtts import gTTS
-    st.success("gTTS installed successfully!")
 # Simple image-to-text function
 def img2text(image):
@@ -66,26 +102,6 @@ def text2story(text):
     # If no good ending is found, return as is
     return story_text
-# Updated text-to-audio function using gTTS
-def text2audio(story_text):
-    # Create a temporary file
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-    temp_filename = temp_file.name
-    temp_file.close()
-    # Use gTTS to convert text to speech
-    tts = gTTS(text=story_text, lang='en', slow=False)
-    tts.save(temp_filename)
-    # Read the audio file
-    with open(temp_filename, 'rb') as audio_file:
-        audio_bytes = audio_file.read()
-    # Clean up the temporary file
-    os.unlink(temp_filename)
-    return audio_bytes
 # Basic Streamlit interface
 st.title("Image to Audio Story")
 uploaded_file = st.file_uploader("Upload an image")
@@ -110,36 +126,13 @@ if uploaded_file is not None:
     # Text to Audio
     with st.spinner("Generating audio..."):
         try:
-            audio_bytes = text2audio(story)
             # Play audio
-            st.audio(audio_bytes, format='audio/mp3')
         except Exception as e:
             st.error(f"Error generating or playing audio: {e}")
-            st.info("If you're having issues with gTTS, you might need to manually install it with: pip install gTTS")
-            # Fallback to a simple TTS if gTTS fails
-            try:
-                st.write("Attempting fallback to pyttsx3...")
-                import pyttsx3
-                engine = pyttsx3.init()
-                # Create a temporary file for the fallback audio
-                temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
-                temp_wav_filename = temp_wav.name
-                temp_wav.close()
-                # Generate and save speech
-                engine.save_to_file(story, temp_wav_filename)
-                engine.runAndWait()
-                # Read the audio file
-                with open(temp_wav_filename, 'rb') as audio_file:
-                    fallback_audio = audio_file.read()
-                # Clean up
-                os.unlink(temp_wav_filename)
-                st.audio(fallback_audio, format='audio/wav')
-            except:
-                st.error("Both TTS methods failed. Please install gTTS manually.")

 import torch
 import os
 import tempfile
+# For TTS, try multiple options in order of preference
 try:
+    # Try gTTS first
     from gtts import gTTS
+    def text2audio(story_text):
+        # Create a temporary file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        temp_filename = temp_file.name
+        temp_file.close()
+        # Use gTTS to convert text to speech
+        tts = gTTS(text=story_text, lang='en', slow=False)
+        tts.save(temp_filename)
+        # Read the audio file
+        with open(temp_filename, 'rb') as audio_file:
+            audio_bytes = audio_file.read()
+        # Clean up the temporary file
+        os.unlink(temp_filename)
+        return audio_bytes, 'audio/mp3'
 except ImportError:
+    st.warning("gTTS not available. Using alternative text-to-speech method.")
+    # Define alternative TTS using built-in transformers pipeline
+    def text2audio(story_text):
+        # Use a different TTS method
+        from transformers import pipeline
+        # Try a simple TTS model that should work with base transformers
+        synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
+        # Generate speech
+        speech = synthesizer(story_text)
+        # Return the audio data
+        if 'audio' in speech:
+            return speech['audio'], speech.get('sampling_rate', 16000)
+        elif 'audio_array' in speech:
+            return speech['audio_array'], speech.get('sampling_rate', 16000)
+        else:
+            # In case of failure, return an error message
+            raise Exception("Failed to generate audio with any available method")
 # Simple image-to-text function
 def img2text(image):
     # If no good ending is found, return as is
     return story_text
 # Basic Streamlit interface
 st.title("Image to Audio Story")
 uploaded_file = st.file_uploader("Upload an image")
     # Text to Audio
     with st.spinner("Generating audio..."):
         try:
+            audio_data, audio_format = text2audio(story)
             # Play audio
+            if isinstance(audio_format, str) and audio_format.startswith('audio/'):
+                st.audio(audio_data, format=audio_format)
+            else:
+                st.audio(audio_data, sample_rate=audio_format)
         except Exception as e:
             st.error(f"Error generating or playing audio: {e}")
+            st.info("There was an issue with the text-to-speech conversion.")