Spaces:
Running on Zero
Running on Zero
qwen_tts/inference/qwen3_tts_model.py
CHANGED
|
@@ -621,27 +621,8 @@ class Qwen3TTSModel:
|
|
| 621 |
**gen_kwargs,
|
| 622 |
)
|
| 623 |
|
| 624 |
-
codes_for_decode = []
|
| 625 |
-
for i, codes in enumerate(talker_codes_list):
|
| 626 |
-
ref_code_list = voice_clone_prompt_dict.get("ref_code", None)
|
| 627 |
-
if ref_code_list is not None and ref_code_list[i] is not None:
|
| 628 |
-
codes_for_decode.append(torch.cat([ref_code_list[i].to(codes.device), codes], dim=0))
|
| 629 |
-
else:
|
| 630 |
-
codes_for_decode.append(codes)
|
| 631 |
-
|
| 632 |
logger.info("模型推理完成,正在解码音频...")
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
wavs_out: List[np.ndarray] = []
|
| 636 |
-
for i, wav in enumerate(wavs_all):
|
| 637 |
-
ref_code_list = voice_clone_prompt_dict.get("ref_code", None)
|
| 638 |
-
if ref_code_list is not None and ref_code_list[i] is not None:
|
| 639 |
-
ref_len = int(ref_code_list[i].shape[0])
|
| 640 |
-
total_len = int(codes_for_decode[i].shape[0])
|
| 641 |
-
cut = int(ref_len / max(total_len, 1) * wav.shape[0])
|
| 642 |
-
wavs_out.append(wav[cut:])
|
| 643 |
-
else:
|
| 644 |
-
wavs_out.append(wav)
|
| 645 |
|
| 646 |
return wavs_out, fs
|
| 647 |
|
|
|
|
| 621 |
**gen_kwargs,
|
| 622 |
)
|
| 623 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 624 |
logger.info("模型推理完成,正在解码音频...")
|
| 625 |
+
wavs_out, fs = self.model.speech_tokenizer.decode([{"audio_codes": c} for c in talker_codes_list])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
|
| 627 |
return wavs_out, fs
|
| 628 |
|