Spaces:
Runtime error
Runtime error
tonic
commited on
Commit
·
568c287
1
Parent(s):
6150c59
process audio logic
Browse files
app.py
CHANGED
|
@@ -151,18 +151,43 @@ def process_audio_to_text(audio_path, inputlanguage="English", outputlanguage="E
|
|
| 151 |
print("Audio Result: ", result)
|
| 152 |
return result[0]
|
| 153 |
|
| 154 |
-
def process_text_to_audio(text, translatefrom="English", translateto="English"):
|
| 155 |
"""
|
| 156 |
Convert text input to audio using the Gradio client.
|
|
|
|
| 157 |
"""
|
| 158 |
-
|
| 159 |
-
|
| 160 |
text,
|
| 161 |
translatefrom,
|
| 162 |
translateto,
|
| 163 |
api_name="/t2st"
|
| 164 |
)
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
def initialize_ocr_models():
|
| 168 |
"""
|
|
@@ -221,11 +246,11 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
|
|
| 221 |
audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
|
| 222 |
final_text += "\n" + audio_text
|
| 223 |
|
| 224 |
-
final_text_with_producetext = final_text + producetext
|
| 225 |
|
| 226 |
response = co.generate(
|
| 227 |
model='c4ai-aya',
|
| 228 |
-
prompt=final_text_with_producetext
|
| 229 |
max_tokens=1024,
|
| 230 |
temperature=0.5
|
| 231 |
)
|
|
|
|
| 151 |
print("Audio Result: ", result)
|
| 152 |
return result[0]
|
| 153 |
|
| 154 |
+
def process_text_to_audio(text, translatefrom="English", translateto="English", filename_prefix="audio"):
|
| 155 |
"""
|
| 156 |
Convert text input to audio using the Gradio client.
|
| 157 |
+
Ensure the audio file is correctly saved and returned as a file path.
|
| 158 |
"""
|
| 159 |
+
# Generate audio from text
|
| 160 |
+
audio_response = audio_client.predict(
|
| 161 |
text,
|
| 162 |
translatefrom,
|
| 163 |
translateto,
|
| 164 |
api_name="/t2st"
|
| 165 |
)
|
| 166 |
+
filename = f"{filename_prefix}_{hash(text)}.wav"
|
| 167 |
+
audio_file_path = save_audio_data_to_file(audio_response, filename=filename)
|
| 168 |
+
return audio_file_path
|
| 169 |
+
|
| 170 |
+
def save_audio_data_to_file(audio_data, directory="audio_files", filename="output_audio.wav"):
|
| 171 |
+
"""
|
| 172 |
+
Save audio data to a file and return the file path.
|
| 173 |
+
"""
|
| 174 |
+
Path(directory).mkdir(parents=True, exist_ok=True)
|
| 175 |
+
file_path = os.path.join(directory, filename)
|
| 176 |
+
with open(file_path, 'wb') as file:
|
| 177 |
+
file.write(audio_data)
|
| 178 |
+
return file_path
|
| 179 |
+
|
| 180 |
+
# Ensure the function that reads the audio file checks if the path is a file
|
| 181 |
+
def read_audio_file(file_path):
|
| 182 |
+
"""
|
| 183 |
+
Read and return the audio file content if the path is a file.
|
| 184 |
+
"""
|
| 185 |
+
if os.path.isfile(file_path):
|
| 186 |
+
with open(file_path, 'rb') as file:
|
| 187 |
+
return file.read()
|
| 188 |
+
else:
|
| 189 |
+
raise ValueError(f"Expected a file path, got a directory: {file_path}")
|
| 190 |
+
|
| 191 |
|
| 192 |
def initialize_ocr_models():
|
| 193 |
"""
|
|
|
|
| 246 |
audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
|
| 247 |
final_text += "\n" + audio_text
|
| 248 |
|
| 249 |
+
final_text_with_producetext = final_text + producetext.format(target_language=translateto)
|
| 250 |
|
| 251 |
response = co.generate(
|
| 252 |
model='c4ai-aya',
|
| 253 |
+
prompt=final_text_with_producetext,
|
| 254 |
max_tokens=1024,
|
| 255 |
temperature=0.5
|
| 256 |
)
|