MultiMed

Runtime error

App Files Files Community

Tonic commited on Nov 19, 2023

Commit

5a0e49a

1 Parent(s): a543a78

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -31

app.py CHANGED Viewed

@@ -195,16 +195,20 @@ def process_speech(input_language, audio_input):
         return f"{e}"
-def convert_text_to_speech(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
-    if not input_text or not source_language or not target_language:
-        return None, "Invalid input parameters."
     client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
     try:
         result = client.predict(
             "T2ST",
-            "text",
             None,
             None,
             input_text,
@@ -212,44 +216,31 @@ def convert_text_to_speech(input_text: str, source_language: str, target_languag
             target_language,
             api_name="/run"
         )
-    except Exception as e:
-        return None, f"Error during prediction: {str(e)}"
-    try:
         translated_text = ""
         audio_file_path = ""
         if result:
             for item in result:
                 if isinstance(item, str):
-                    if item.endswith('.mp3') and not audio_file_path:
-                        audio_file_path = item
                     else:
                         translated_text += item + " "
-    except Exception as e:
-        return None, f"Error processing result: {str(e)}"
-    if not audio_file_path:
-        return None, "No audio file path found in the result."
-    return audio_file_path, translated_text.strip()
-def process_image(image_input):
-    # Initialize the Gradio client with the URL of the Gradio server
-    client = Client("https://adept-fuyu-8b-demo.hf.space/--replicas/pqjvl/")
-    # Assuming image_input is a URL path to the image
-    image_path = image_input
-    # Call the predict method of the client
-    result = client.predict(
-        image_path,  # URL of the image
-        True,        # Additional parameter for the server (e.g., enable detailed captioning)
-        fn_index=2
-    )
-    return result
 def query_vectara(text):

         return f"{e}"
+def is_base64(s):
+    try:
+        return base64.b64encode(base64.b64decode(s)) == s.encode()
+    except Exception:
+        return False
+def convert_text_to_speech(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
     client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
     try:
+        # Make a prediction request to the client
         result = client.predict(
             "T2ST",
+            "text",  # Since we are doing text-to-speech
             None,
             None,
             input_text,
             target_language,
             api_name="/run"
         )
+        # Print or log the raw API response for inspection
+        print("Raw API Response:", result)
+        # Initialize variables
         translated_text = ""
         audio_file_path = ""
+        # Process the result
         if result:
             for item in result:
                 if isinstance(item, str):
+                    # Check if the item is a URL pointing to an audio file or a base64 encoded string
+                    if any(ext in item.lower() for ext in ['.mp3', '.wav', '.ogg']) or is_base64(item):
+                        if not audio_file_path:  # Store only the first audio file path or base64 string
+                            audio_file_path = item
                     else:
+                        # Concatenate the translated text
                         translated_text += item + " "
+        return audio_file_path, translated_text.strip()
+    except Exception as e:
+        print(f"Error in text-to-speech conversion: {str(e)}")
+        return None, f"Error in text-to-speech conversion: {str(e)}"
 def query_vectara(text):