Update f5_tts/infer/utils_infer.py
Browse files- f5_tts/infer/utils_infer.py +0 -50
f5_tts/infer/utils_infer.py
CHANGED
|
@@ -404,56 +404,6 @@ def infer_process(
|
|
| 404 |
device=device,
|
| 405 |
)
|
| 406 |
|
| 407 |
-
def infer_from_embedding(
|
| 408 |
-
speaker_embedding,
|
| 409 |
-
gen_text,
|
| 410 |
-
model_obj,
|
| 411 |
-
vocoder,
|
| 412 |
-
mel_spec_type="vocos",
|
| 413 |
-
speed=1.0,
|
| 414 |
-
nfe_step=32,
|
| 415 |
-
cfg_strength=2.0,
|
| 416 |
-
sway_sampling_coef=-1,
|
| 417 |
-
fix_duration=None,
|
| 418 |
-
device="cuda"
|
| 419 |
-
):
|
| 420 |
-
generated_waves = []
|
| 421 |
-
spectrograms = []
|
| 422 |
-
|
| 423 |
-
# Estimate duration if fix_duration is not provided
|
| 424 |
-
# These are just heuristics since no ref_text is used
|
| 425 |
-
est_duration = fix_duration
|
| 426 |
-
if est_duration is None:
|
| 427 |
-
avg_duration_sec = max(len(gen_text) * 0.06, 1.0) # crude approximation
|
| 428 |
-
est_duration = int(avg_duration_sec * target_sample_rate / hop_length)
|
| 429 |
-
|
| 430 |
-
# Tokenize gen_text
|
| 431 |
-
final_text_list = convert_char_to_pinyin([gen_text])
|
| 432 |
-
|
| 433 |
-
with torch.inference_mode():
|
| 434 |
-
generated, _ = model_obj.sample(
|
| 435 |
-
cond=speaker_embedding.to(device),
|
| 436 |
-
text=final_text_list,
|
| 437 |
-
duration=est_duration,
|
| 438 |
-
steps=nfe_step,
|
| 439 |
-
cfg_strength=cfg_strength,
|
| 440 |
-
sway_sampling_coef=sway_sampling_coef,
|
| 441 |
-
)
|
| 442 |
-
|
| 443 |
-
generated = generated.to(torch.float32)
|
| 444 |
-
generated_mel_spec = generated.permute(0, 2, 1)
|
| 445 |
-
|
| 446 |
-
if mel_spec_type == "vocos":
|
| 447 |
-
generated_wave = vocoder.decode(generated_mel_spec)
|
| 448 |
-
elif mel_spec_type == "bigvgan":
|
| 449 |
-
generated_wave = vocoder(generated_mel_spec)
|
| 450 |
-
|
| 451 |
-
final_wave = generated_wave.squeeze().cpu().numpy()
|
| 452 |
-
final_spec = generated_mel_spec[0].cpu().numpy()
|
| 453 |
-
|
| 454 |
-
return final_wave, target_sample_rate, final_spec
|
| 455 |
-
|
| 456 |
-
|
| 457 |
# infer batches
|
| 458 |
|
| 459 |
|
|
|
|
| 404 |
device=device,
|
| 405 |
)
|
| 406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
# infer batches
|
| 408 |
|
| 409 |
|