Fix OOM
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import pathlib
|
|
| 5 |
import time
|
| 6 |
import tempfile
|
| 7 |
import platform
|
|
|
|
| 8 |
if platform.system().lower() == 'windows':
|
| 9 |
temp = pathlib.PosixPath
|
| 10 |
pathlib.PosixPath = pathlib.WindowsPath
|
|
@@ -113,6 +114,9 @@ def transcribe_one(model, audio_path):
|
|
| 113 |
text_pr = result.text
|
| 114 |
if text_pr.strip(" ")[-1] not in "?!.,。,?!。、":
|
| 115 |
text_pr += "."
|
|
|
|
|
|
|
|
|
|
| 116 |
return lang, text_pr
|
| 117 |
|
| 118 |
def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
|
|
@@ -154,6 +158,10 @@ def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
|
|
| 154 |
# save as npz file
|
| 155 |
np.savez(os.path.join(tempfile.gettempdir(), f"{name}.npz"),
|
| 156 |
audio_tokens=audio_tokens, text_tokens=text_tokens, lang_code=lang2code[lang_pr])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
return message, os.path.join(tempfile.gettempdir(), f"{name}.npz")
|
| 158 |
|
| 159 |
|
|
@@ -176,6 +184,8 @@ def make_prompt(name, wav, sr, save=True):
|
|
| 176 |
if not save:
|
| 177 |
os.remove(f"./prompts/{name}.wav")
|
| 178 |
os.remove(f"./prompts/{name}.txt")
|
|
|
|
|
|
|
| 179 |
|
| 180 |
return text, lang
|
| 181 |
|
|
@@ -250,6 +260,8 @@ def infer_from_audio(text, language, accent, audio_prompt, record_audio_prompt,
|
|
| 250 |
)
|
| 251 |
|
| 252 |
message = f"text prompt: {text_pr}\nsythesized text: {text}"
|
|
|
|
|
|
|
| 253 |
return message, (24000, samples[0][0].cpu().numpy())
|
| 254 |
|
| 255 |
@torch.no_grad()
|
|
@@ -306,6 +318,9 @@ def infer_from_prompt(text, language, accent, preset_prompt, prompt_file):
|
|
| 306 |
)
|
| 307 |
|
| 308 |
message = f"sythesized text: {text}"
|
|
|
|
|
|
|
|
|
|
| 309 |
return message, (24000, samples[0][0].cpu().numpy())
|
| 310 |
|
| 311 |
|
|
@@ -439,6 +454,7 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
|
|
| 439 |
[(complete_tokens, None)]
|
| 440 |
)
|
| 441 |
message = f"Cut into {len(sentences)} sentences"
|
|
|
|
| 442 |
return message, (24000, samples[0][0].cpu().numpy())
|
| 443 |
else:
|
| 444 |
raise ValueError(f"No such mode {mode}")
|
|
|
|
| 5 |
import time
|
| 6 |
import tempfile
|
| 7 |
import platform
|
| 8 |
+
import gc
|
| 9 |
if platform.system().lower() == 'windows':
|
| 10 |
temp = pathlib.PosixPath
|
| 11 |
pathlib.PosixPath = pathlib.WindowsPath
|
|
|
|
| 114 |
text_pr = result.text
|
| 115 |
if text_pr.strip(" ")[-1] not in "?!.,。,?!。、":
|
| 116 |
text_pr += "."
|
| 117 |
+
|
| 118 |
+
# delete all variables
|
| 119 |
+
del audio, mel, probs, result
|
| 120 |
return lang, text_pr
|
| 121 |
|
| 122 |
def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
|
|
|
|
| 158 |
# save as npz file
|
| 159 |
np.savez(os.path.join(tempfile.gettempdir(), f"{name}.npz"),
|
| 160 |
audio_tokens=audio_tokens, text_tokens=text_tokens, lang_code=lang2code[lang_pr])
|
| 161 |
+
|
| 162 |
+
# delete all variables
|
| 163 |
+
del audio_tokens, text_tokens, phonemes, lang_pr, text_pr, wav_pr, sr, uploaded_audio, recorded_audio
|
| 164 |
+
|
| 165 |
return message, os.path.join(tempfile.gettempdir(), f"{name}.npz")
|
| 166 |
|
| 167 |
|
|
|
|
| 184 |
if not save:
|
| 185 |
os.remove(f"./prompts/{name}.wav")
|
| 186 |
os.remove(f"./prompts/{name}.txt")
|
| 187 |
+
# delete all variables
|
| 188 |
+
del lang_token, wav, sr
|
| 189 |
|
| 190 |
return text, lang
|
| 191 |
|
|
|
|
| 260 |
)
|
| 261 |
|
| 262 |
message = f"text prompt: {text_pr}\nsythesized text: {text}"
|
| 263 |
+
# delete all variables
|
| 264 |
+
del audio_prompts, text_tokens, text_prompts, phone_tokens, encoded_frames, wav_pr, sr, audio_prompt, record_audio_prompt, transcript_content
|
| 265 |
return message, (24000, samples[0][0].cpu().numpy())
|
| 266 |
|
| 267 |
@torch.no_grad()
|
|
|
|
| 318 |
)
|
| 319 |
|
| 320 |
message = f"sythesized text: {text}"
|
| 321 |
+
|
| 322 |
+
# delete all variables
|
| 323 |
+
del audio_prompts, text_tokens, text_prompts, phone_tokens, encoded_frames, prompt_file, preset_prompt
|
| 324 |
return message, (24000, samples[0][0].cpu().numpy())
|
| 325 |
|
| 326 |
|
|
|
|
| 454 |
[(complete_tokens, None)]
|
| 455 |
)
|
| 456 |
message = f"Cut into {len(sentences)} sentences"
|
| 457 |
+
|
| 458 |
return message, (24000, samples[0][0].cpu().numpy())
|
| 459 |
else:
|
| 460 |
raise ValueError(f"No such mode {mode}")
|