Update qwen_tts/inference/qwen3_tts_model.py
Browse files
qwen_tts/inference/qwen3_tts_model.py
CHANGED
|
@@ -286,6 +286,7 @@ class Qwen3TTSModel:
|
|
| 286 |
|
| 287 |
def _merge_generate_kwargs(
|
| 288 |
self,
|
|
|
|
| 289 |
do_sample: Optional[bool] = None,
|
| 290 |
top_k: Optional[int] = None,
|
| 291 |
top_p: Optional[float] = None,
|
|
@@ -307,7 +308,7 @@ class Qwen3TTSModel:
|
|
| 307 |
- Otherwise, fall back to the hard defaults.
|
| 308 |
|
| 309 |
Args:
|
| 310 |
-
do_sample, top_k, top_p, temperature, repetition_penalty,
|
| 311 |
subtalker_dosample, subtalker_top_k, subtalker_top_p, subtalker_temperature, max_new_tokens:
|
| 312 |
Common generation parameters.
|
| 313 |
**kwargs:
|
|
@@ -317,6 +318,7 @@ class Qwen3TTSModel:
|
|
| 317 |
Dict[str, Any]: Final kwargs to pass into model.generate().
|
| 318 |
"""
|
| 319 |
hard_defaults = dict(
|
|
|
|
| 320 |
do_sample=True,
|
| 321 |
top_k=50,
|
| 322 |
top_p=1.0,
|
|
@@ -338,6 +340,7 @@ class Qwen3TTSModel:
|
|
| 338 |
|
| 339 |
merged = dict(kwargs)
|
| 340 |
merged.update(
|
|
|
|
| 341 |
do_sample=pick("do_sample", do_sample),
|
| 342 |
top_k=pick("top_k", top_k),
|
| 343 |
top_p=pick("top_p", top_p),
|
|
@@ -475,7 +478,6 @@ class Qwen3TTSModel:
|
|
| 475 |
ref_text: Optional[Union[str, List[Optional[str]]]] = None,
|
| 476 |
x_vector_only_mode: Union[bool, List[bool]] = False,
|
| 477 |
voice_clone_prompt: Optional[Union[Dict[str, Any], List[VoiceClonePromptItem]]] = None,
|
| 478 |
-
non_streaming_mode: bool = False,
|
| 479 |
**kwargs,
|
| 480 |
) -> Tuple[List[np.ndarray], int]:
|
| 481 |
"""
|
|
@@ -605,7 +607,6 @@ class Qwen3TTSModel:
|
|
| 605 |
ref_ids=ref_ids,
|
| 606 |
voice_clone_prompt=voice_clone_prompt_dict,
|
| 607 |
languages=languages,
|
| 608 |
-
non_streaming_mode=non_streaming_mode,
|
| 609 |
**gen_kwargs,
|
| 610 |
)
|
| 611 |
|
|
@@ -639,7 +640,6 @@ class Qwen3TTSModel:
|
|
| 639 |
text: Union[str, List[str]],
|
| 640 |
instruct: Union[str, List[str]],
|
| 641 |
language: Union[str, List[str]] = None,
|
| 642 |
-
non_streaming_mode: bool = True,
|
| 643 |
**kwargs,
|
| 644 |
) -> Tuple[List[np.ndarray], int]:
|
| 645 |
"""
|
|
@@ -720,7 +720,6 @@ class Qwen3TTSModel:
|
|
| 720 |
input_ids=input_ids,
|
| 721 |
instruct_ids=instruct_ids,
|
| 722 |
languages=languages,
|
| 723 |
-
non_streaming_mode=non_streaming_mode,
|
| 724 |
**gen_kwargs,
|
| 725 |
)
|
| 726 |
|
|
@@ -735,7 +734,6 @@ class Qwen3TTSModel:
|
|
| 735 |
speaker: Union[str, List[str]],
|
| 736 |
language: Union[str, List[str]] = None,
|
| 737 |
instruct: Optional[Union[str, List[str]]] = None,
|
| 738 |
-
non_streaming_mode: bool = True,
|
| 739 |
**kwargs,
|
| 740 |
) -> Tuple[List[np.ndarray], int]:
|
| 741 |
"""
|
|
@@ -831,7 +829,6 @@ class Qwen3TTSModel:
|
|
| 831 |
instruct_ids=instruct_ids,
|
| 832 |
languages=languages,
|
| 833 |
speakers=speakers,
|
| 834 |
-
non_streaming_mode=non_streaming_mode,
|
| 835 |
**gen_kwargs,
|
| 836 |
)
|
| 837 |
|
|
@@ -874,4 +871,4 @@ class Qwen3TTSModel:
|
|
| 874 |
supported = self._supported_languages_set()
|
| 875 |
if supported is None:
|
| 876 |
return None
|
| 877 |
-
return sorted(supported)
|
|
|
|
| 286 |
|
| 287 |
def _merge_generate_kwargs(
|
| 288 |
self,
|
| 289 |
+
non_streaming_mode: Optional[bool] = None,
|
| 290 |
do_sample: Optional[bool] = None,
|
| 291 |
top_k: Optional[int] = None,
|
| 292 |
top_p: Optional[float] = None,
|
|
|
|
| 308 |
- Otherwise, fall back to the hard defaults.
|
| 309 |
|
| 310 |
Args:
|
| 311 |
+
non_streaming_mode, do_sample, top_k, top_p, temperature, repetition_penalty,
|
| 312 |
subtalker_dosample, subtalker_top_k, subtalker_top_p, subtalker_temperature, max_new_tokens:
|
| 313 |
Common generation parameters.
|
| 314 |
**kwargs:
|
|
|
|
| 318 |
Dict[str, Any]: Final kwargs to pass into model.generate().
|
| 319 |
"""
|
| 320 |
hard_defaults = dict(
|
| 321 |
+
non_streaming_mode=False,
|
| 322 |
do_sample=True,
|
| 323 |
top_k=50,
|
| 324 |
top_p=1.0,
|
|
|
|
| 340 |
|
| 341 |
merged = dict(kwargs)
|
| 342 |
merged.update(
|
| 343 |
+
non_streaming_mode=pick("non_streaming_mode", non_streaming_mode),
|
| 344 |
do_sample=pick("do_sample", do_sample),
|
| 345 |
top_k=pick("top_k", top_k),
|
| 346 |
top_p=pick("top_p", top_p),
|
|
|
|
| 478 |
ref_text: Optional[Union[str, List[Optional[str]]]] = None,
|
| 479 |
x_vector_only_mode: Union[bool, List[bool]] = False,
|
| 480 |
voice_clone_prompt: Optional[Union[Dict[str, Any], List[VoiceClonePromptItem]]] = None,
|
|
|
|
| 481 |
**kwargs,
|
| 482 |
) -> Tuple[List[np.ndarray], int]:
|
| 483 |
"""
|
|
|
|
| 607 |
ref_ids=ref_ids,
|
| 608 |
voice_clone_prompt=voice_clone_prompt_dict,
|
| 609 |
languages=languages,
|
|
|
|
| 610 |
**gen_kwargs,
|
| 611 |
)
|
| 612 |
|
|
|
|
| 640 |
text: Union[str, List[str]],
|
| 641 |
instruct: Union[str, List[str]],
|
| 642 |
language: Union[str, List[str]] = None,
|
|
|
|
| 643 |
**kwargs,
|
| 644 |
) -> Tuple[List[np.ndarray], int]:
|
| 645 |
"""
|
|
|
|
| 720 |
input_ids=input_ids,
|
| 721 |
instruct_ids=instruct_ids,
|
| 722 |
languages=languages,
|
|
|
|
| 723 |
**gen_kwargs,
|
| 724 |
)
|
| 725 |
|
|
|
|
| 734 |
speaker: Union[str, List[str]],
|
| 735 |
language: Union[str, List[str]] = None,
|
| 736 |
instruct: Optional[Union[str, List[str]]] = None,
|
|
|
|
| 737 |
**kwargs,
|
| 738 |
) -> Tuple[List[np.ndarray], int]:
|
| 739 |
"""
|
|
|
|
| 829 |
instruct_ids=instruct_ids,
|
| 830 |
languages=languages,
|
| 831 |
speakers=speakers,
|
|
|
|
| 832 |
**gen_kwargs,
|
| 833 |
)
|
| 834 |
|
|
|
|
| 871 |
supported = self._supported_languages_set()
|
| 872 |
if supported is None:
|
| 873 |
return None
|
| 874 |
+
return sorted(supported)
|