Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -140,61 +140,49 @@ def process_speech(input_language, audio_input):
|
|
| 140 |
return f"{e}"
|
| 141 |
|
| 142 |
|
| 143 |
-
def convert_text_to_speech(input_text, source_language, target_language):
|
| 144 |
-
"""
|
| 145 |
-
Convert text to speech in the specified language and return the audio file path and translated text.
|
| 146 |
-
"""
|
| 147 |
client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
|
| 148 |
|
| 149 |
try:
|
| 150 |
result = client.predict(
|
| 151 |
"T2ST", # Task
|
| 152 |
-
"
|
|
|
|
|
|
|
| 153 |
input_text, # Input text
|
| 154 |
-
"", # Empty string for audio name
|
| 155 |
-
"", # Empty string for audio name
|
| 156 |
source_language, # Source language
|
| 157 |
target_language, # Target language
|
| 158 |
-
|
| 159 |
)
|
| 160 |
|
| 161 |
# Initialize variables
|
| 162 |
original_audio_file = None
|
| 163 |
translated_text = ""
|
| 164 |
new_file_path = ""
|
| 165 |
-
|
| 166 |
-
#
|
| 167 |
-
if isinstance(result, list) and len(result) > 1:
|
| 168 |
-
downloaded_files = []
|
| 169 |
-
for file_path in result[1:]:
|
| 170 |
-
if os.path.isfile(file_path):
|
| 171 |
-
# Download the file
|
| 172 |
-
downloaded_files.append(file_path)
|
| 173 |
-
else:
|
| 174 |
-
print(f"Path is not a file: {file_path}")
|
| 175 |
-
|
| 176 |
-
if downloaded_files:
|
| 177 |
-
# Process only the first file
|
| 178 |
-
original_audio_file = downloaded_files[0]
|
| 179 |
-
new_file_name = f"audio_output_{uuid.uuid4()}.wav"
|
| 180 |
-
new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
|
| 181 |
-
os.rename(original_audio_file, new_file_path)
|
| 182 |
-
|
| 183 |
-
# Iterate over the result to find the last text item
|
| 184 |
for item in result:
|
| 185 |
if isinstance(item, str):
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
if original_audio_file:
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
| 192 |
|
| 193 |
except Exception as e:
|
| 194 |
-
# Return a concise error message
|
| 195 |
return f"Error in text-to-speech conversion: {str(e)}", ""
|
| 196 |
|
| 197 |
-
# return "Unexpected result format or insufficient data received.", "" //UNREACHABLE CODE
|
| 198 |
|
| 199 |
|
| 200 |
def process_image(image_input):
|
|
|
|
| 140 |
return f"{e}"
|
| 141 |
|
| 142 |
|
| 143 |
+
def convert_text_to_speech(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
|
|
|
|
|
|
|
|
|
|
| 144 |
client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
|
| 145 |
|
| 146 |
try:
|
| 147 |
result = client.predict(
|
| 148 |
"T2ST", # Task
|
| 149 |
+
"", # Audio source
|
| 150 |
+
None, # Input audio mic
|
| 151 |
+
None, # Input audio file
|
| 152 |
input_text, # Input text
|
|
|
|
|
|
|
| 153 |
source_language, # Source language
|
| 154 |
target_language, # Target language
|
| 155 |
+
"/run" # API endpoint
|
| 156 |
)
|
| 157 |
|
| 158 |
# Initialize variables
|
| 159 |
original_audio_file = None
|
| 160 |
translated_text = ""
|
| 161 |
new_file_path = ""
|
| 162 |
+
|
| 163 |
+
# Iterate over the result to find the text and the first audio file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
for item in result:
|
| 165 |
if isinstance(item, str):
|
| 166 |
+
# Check if the item is likely a URL
|
| 167 |
+
if item.startswith('http://') or item.startswith('https://'):
|
| 168 |
+
continue
|
| 169 |
+
# Assign the first non-URL string as the translated text
|
| 170 |
+
if not translated_text:
|
| 171 |
+
translated_text = item
|
| 172 |
+
elif os.path.isfile(item):
|
| 173 |
+
original_audio_file = item
|
| 174 |
+
break # Stop after finding the first audio file
|
| 175 |
|
| 176 |
if original_audio_file:
|
| 177 |
+
new_file_name = f"audio_output_{uuid.uuid4()}.wav"
|
| 178 |
+
new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
|
| 179 |
+
os.rename(original_audio_file, new_file_path)
|
| 180 |
+
|
| 181 |
+
return new_file_path, translated_text
|
| 182 |
|
| 183 |
except Exception as e:
|
|
|
|
| 184 |
return f"Error in text-to-speech conversion: {str(e)}", ""
|
| 185 |
|
|
|
|
| 186 |
|
| 187 |
|
| 188 |
def process_image(image_input):
|