MultiMedTulu

Runtime error

App Files Files Community

Tonic commited on Nov 19, 2023

Commit

cbb63b6

1 Parent(s): 16c1c4f

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -16

app.py CHANGED Viewed

@@ -122,35 +122,41 @@ def process_speech(input_language, audio_input):
         return f"{e}"
-def convert_text_to_speech(input_text, target_language):
     """
-    Convert text to speech in the specified language, rename the audio file with a unique identifier, and return both the new audio file path and the input text.
     """
     try:
-        text_to_speech_result = seamless_client.predict(
-            "T2ST",  # Task: Text to Speech Translation
             "text",  # Input type
-            None,  # No file input for text to speech
             input_text,  # Input text
             "",  # Empty string for audio name
-            "",  # Empty string for source language, as it's not needed here
             target_language,  # Target language
             api_name="/run"  # API name
         )
-        original_audio_file = text_to_speech_result[1]  # Assuming the audio file path is in the second position
-        # Generate a new file name with a random UUID
-        new_file_name = f"audio_output_{uuid.uuid4()}.wav"
-        new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
-        # Rename the file
-        os.rename(original_audio_file, new_file_path)
-        return new_file_path, input_text
-    except Exception as e:
-        return f"An error occurred during text-to-speech conversion: {e}", input_text
 def save_image(image_input, output_dir="saved_images"):
     if not os.path.exists(output_dir):
@@ -423,7 +429,8 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
         final_response = process_summary_with_stablemed(summary)
         # Convert translated text to speech and get both audio file and text
-        audio_output, translated_text = convert_text_to_speech(final_response, input_language)
         # Evaluate hallucination
         hallucination_label = evaluate_hallucination(final_response, summary)

         return f"{e}"
+def convert_text_to_speech(input_text, source_language, target_language):
     """
+    Convert text to speech in the specified language and return the new audio file path.
     """
+    client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
     try:
+        result = client.predict(
+            "T2ST (Text to Speech translation)",  # Task
             "text",  # Input type
             input_text,  # Input text
             "",  # Empty string for audio name
+            source_language,  # Source language
             target_language,  # Target language
             api_name="/run"  # API name
         )
+        # Assuming the audio file path is returned in the result
+        original_audio_file = result[1] if len(result) > 1 else None
+        if original_audio_file:
+            # Generate a new file name with a random UUID
+            new_file_name = f"audio_output_{uuid.uuid4()}.wav"
+            new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
+            # Rename the file
+            os.rename(original_audio_file, new_file_path)
+            return new_file_path
+        else:
+            return "No audio file generated."
+    except Exception as e:
+        # Return a concise error message
+        return f"Error in text-to-speech conversion: {str(e)}"
 def save_image(image_input, output_dir="saved_images"):
     if not os.path.exists(output_dir):
         final_response = process_summary_with_stablemed(summary)
         # Convert translated text to speech and get both audio file and text
+        target_language = "English"  # Set the target language for the speech
+        audio_file_path = convert_text_to_speech(final_response, target_language, input_language)
         # Evaluate hallucination
         hallucination_label = evaluate_hallucination(final_response, summary)