Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

cd9e32e

verified ·

1 Parent(s): 6706f05

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -31

app.py CHANGED Viewed

@@ -46,8 +46,8 @@ def text2story(text):
 # text2audio - REVISED with proper audio field handling
 def text2audio(story_text):
     try:
-        # Use the facebook TTS model
-        synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
         # Limit text length to avoid timeouts
         max_chars = 500
@@ -61,45 +61,32 @@ def text2audio(story_text):
         # Generate speech
         speech = synthesizer(story_text)
-        # DEBUG: Print the keys in the speech output to understand its structure
-        st.write(f"Speech output keys: {list(speech.keys())}")
         # Create a temporary WAV file
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
         temp_filename = temp_file.name
         temp_file.close()
-        # Write the audio data to the temporary file
-        # The key is likely 'audio' or 'raw' rather than 'bytes'
-        with open(temp_filename, 'wb') as f:
-            # Try to write using the correct key from the output
-            if 'audio' in speech and isinstance(speech['audio'], (bytes, bytearray)):
-                f.write(speech['audio'])
-            elif 'raw' in speech and isinstance(speech['raw'], (bytes, bytearray)):
-                f.write(speech['raw'])
-            elif 'wav' in speech and isinstance(speech['wav'], (bytes, bytearray)):
-                f.write(speech['wav'])
-            elif 'audio' in speech and hasattr(speech['audio'], 'tobytes'):
-                # It might be a numpy array
-                f.write(speech['audio'].tobytes())
-            else:
-                # Try the first value that looks like audio data
-                for key, value in speech.items():
-                    if isinstance(value, (bytes, bytearray)) or (
-                            hasattr(value, 'tobytes') and len(value) > 1000):
-                        if hasattr(value, 'tobytes'):
-                            f.write(value.tobytes())
-                        else:
-                            f.write(value)
-                        st.write(f"Used key: {key} for audio data")
-                        break
-                else:
-                    raise ValueError(f"No suitable audio data found in keys: {list(speech.keys())}")
         return temp_filename
     except Exception as e:
         st.error(f"Error generating audio: {str(e)}")
         return None
 # Function to save temporary image file

 # text2audio - REVISED with proper audio field handling
 def text2audio(story_text):
     try:
+        # Use the MeloTTS model which has better audio quality
+        synthesizer = pipeline("text-to-speech", model="myshell-ai/MeloTTS-English")
         # Limit text length to avoid timeouts
         max_chars = 500
         # Generate speech
         speech = synthesizer(story_text)
         # Create a temporary WAV file
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
         temp_filename = temp_file.name
         temp_file.close()
+        # Debug: Show what keys are available in the speech output
+        st.write(f"Speech output keys: {list(speech.keys())}")
+        # Write the audio data to the temporary file - MeloTTS should have audio and sampling_rate
+        if 'audio' in speech and 'sampling_rate' in speech:
+            # Convert numpy array to WAV file
+            scipy.io.wavfile.write(
+                temp_filename,
+                speech['sampling_rate'],
+                speech['audio'].astype(np.float32)
+            )
+            st.write("Audio successfully written to file")
+        else:
+            raise ValueError(f"Expected 'audio' and 'sampling_rate' in output, but got: {list(speech.keys())}")
         return temp_filename
     except Exception as e:
         st.error(f"Error generating audio: {str(e)}")
+        import traceback
+        st.error(traceback.format_exc())
         return None
 # Function to save temporary image file