Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -122,35 +122,41 @@ def process_speech(input_language, audio_input):
|
|
| 122 |
return f"{e}"
|
| 123 |
|
| 124 |
|
| 125 |
-
def convert_text_to_speech(input_text, target_language):
|
| 126 |
"""
|
| 127 |
-
Convert text to speech in the specified language
|
| 128 |
"""
|
|
|
|
|
|
|
| 129 |
try:
|
| 130 |
-
|
| 131 |
-
"T2ST
|
| 132 |
"text", # Input type
|
| 133 |
-
None, # No file input for text to speech
|
| 134 |
input_text, # Input text
|
| 135 |
"", # Empty string for audio name
|
| 136 |
-
|
| 137 |
target_language, # Target language
|
| 138 |
api_name="/run" # API name
|
| 139 |
)
|
| 140 |
|
| 141 |
-
|
|
|
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
def save_image(image_input, output_dir="saved_images"):
|
| 156 |
if not os.path.exists(output_dir):
|
|
@@ -423,7 +429,8 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
|
|
| 423 |
final_response = process_summary_with_stablemed(summary)
|
| 424 |
|
| 425 |
# Convert translated text to speech and get both audio file and text
|
| 426 |
-
|
|
|
|
| 427 |
|
| 428 |
# Evaluate hallucination
|
| 429 |
hallucination_label = evaluate_hallucination(final_response, summary)
|
|
|
|
| 122 |
return f"{e}"
|
| 123 |
|
| 124 |
|
| 125 |
+
def convert_text_to_speech(input_text, source_language, target_language):
|
| 126 |
"""
|
| 127 |
+
Convert text to speech in the specified language and return the new audio file path.
|
| 128 |
"""
|
| 129 |
+
client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
|
| 130 |
+
|
| 131 |
try:
|
| 132 |
+
result = client.predict(
|
| 133 |
+
"T2ST (Text to Speech translation)", # Task
|
| 134 |
"text", # Input type
|
|
|
|
| 135 |
input_text, # Input text
|
| 136 |
"", # Empty string for audio name
|
| 137 |
+
source_language, # Source language
|
| 138 |
target_language, # Target language
|
| 139 |
api_name="/run" # API name
|
| 140 |
)
|
| 141 |
|
| 142 |
+
# Assuming the audio file path is returned in the result
|
| 143 |
+
original_audio_file = result[1] if len(result) > 1 else None
|
| 144 |
|
| 145 |
+
if original_audio_file:
|
| 146 |
+
# Generate a new file name with a random UUID
|
| 147 |
+
new_file_name = f"audio_output_{uuid.uuid4()}.wav"
|
| 148 |
+
new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
|
| 149 |
|
| 150 |
+
# Rename the file
|
| 151 |
+
os.rename(original_audio_file, new_file_path)
|
| 152 |
|
| 153 |
+
return new_file_path
|
| 154 |
+
else:
|
| 155 |
+
return "No audio file generated."
|
| 156 |
|
| 157 |
+
except Exception as e:
|
| 158 |
+
# Return a concise error message
|
| 159 |
+
return f"Error in text-to-speech conversion: {str(e)}"
|
| 160 |
|
| 161 |
def save_image(image_input, output_dir="saved_images"):
|
| 162 |
if not os.path.exists(output_dir):
|
|
|
|
| 429 |
final_response = process_summary_with_stablemed(summary)
|
| 430 |
|
| 431 |
# Convert translated text to speech and get both audio file and text
|
| 432 |
+
target_language = "English" # Set the target language for the speech
|
| 433 |
+
audio_file_path = convert_text_to_speech(final_response, target_language, input_language)
|
| 434 |
|
| 435 |
# Evaluate hallucination
|
| 436 |
hallucination_label = evaluate_hallucination(final_response, summary)
|