MultiMed

Runtime error

App Files Files Community

Tonic commited on Nov 19, 2023

Commit

f89be67

1 Parent(s): bff0d2c

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -33

app.py CHANGED Viewed

@@ -140,61 +140,49 @@ def process_speech(input_language, audio_input):
         return f"{e}"
-def convert_text_to_speech(input_text, source_language, target_language):
-    """
-    Convert text to speech in the specified language and return the audio file path and translated text.
-    """
     client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
     try:
         result = client.predict(
             "T2ST",  # Task
-            "text",  # Input type
             input_text,  # Input text
-            "",  # Empty string for audio name
-            "",  # Empty string for audio name
             source_language,  # Source language
             target_language,  # Target language
-            api_name="/run"  # API name
         )
         # Initialize variables
         original_audio_file = None
         translated_text = ""
         new_file_path = ""
-        # Check if result contains files
-        if isinstance(result, list) and len(result) > 1:
-            downloaded_files = []
-            for file_path in result[1:]:
-                if os.path.isfile(file_path):
-                    # Download the file
-                    downloaded_files.append(file_path)
-                else:
-                    print(f"Path is not a file: {file_path}")
-            if downloaded_files:
-                # Process only the first file
-                original_audio_file = downloaded_files[0]
-                new_file_name = f"audio_output_{uuid.uuid4()}.wav"
-                new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
-                os.rename(original_audio_file, new_file_path)
-        # Iterate over the result to find the last text item
         for item in result:
             if isinstance(item, str):
-                translated_text = item
         if original_audio_file:
-            return new_file_path, translated_text
-        else:
-            return "No valid audio file generated.", translated_text
     except Exception as e:
-        # Return a concise error message
         return f"Error in text-to-speech conversion: {str(e)}", ""
-    # return "Unexpected result format or insufficient data received.", "" //UNREACHABLE CODE
 def process_image(image_input):

         return f"{e}"
+def convert_text_to_speech(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
     client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
     try:
         result = client.predict(
             "T2ST",  # Task
+            "",  # Audio source
+            None,  # Input audio mic
+            None,  # Input audio file
             input_text,  # Input text
             source_language,  # Source language
             target_language,  # Target language
+            "/run"  # API endpoint
         )
         # Initialize variables
         original_audio_file = None
         translated_text = ""
         new_file_path = ""
+        # Iterate over the result to find the text and the first audio file
         for item in result:
             if isinstance(item, str):
+                # Check if the item is likely a URL
+                if item.startswith('http://') or item.startswith('https://'):
+                    continue
+                # Assign the first non-URL string as the translated text
+                if not translated_text:
+                    translated_text = item
+            elif os.path.isfile(item):
+                original_audio_file = item
+                break  # Stop after finding the first audio file
         if original_audio_file:
+            new_file_name = f"audio_output_{uuid.uuid4()}.wav"
+            new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
+            os.rename(original_audio_file, new_file_path)
+        return new_file_path, translated_text
     except Exception as e:
         return f"Error in text-to-speech conversion: {str(e)}", ""
 def process_image(image_input):