Spaces:
Configuration error
Configuration error
no message
Browse files
webui.py
CHANGED
|
@@ -72,7 +72,7 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
| 72 |
prompt_wav = None
|
| 73 |
# if instruct mode, please make sure that model is iic/CosyVoice-300M-Instruct and not cross_lingual mode
|
| 74 |
if mode_checkbox_group in ['自然语言控制']:
|
| 75 |
-
if
|
| 76 |
gr.Warning('您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型'.format(args.model_dir))
|
| 77 |
yield (target_sr, default_data)
|
| 78 |
if instruct_text == '':
|
|
@@ -82,7 +82,7 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
| 82 |
gr.Info('您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略')
|
| 83 |
# if cross_lingual mode, please make sure that model is iic/CosyVoice-300M and tts_text prompt_text are different language
|
| 84 |
if mode_checkbox_group in ['跨语种复刻']:
|
| 85 |
-
if
|
| 86 |
gr.Warning('您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型'.format(args.model_dir))
|
| 87 |
yield (target_sr, default_data)
|
| 88 |
if instruct_text != '':
|
|
@@ -114,24 +114,24 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
| 114 |
if mode_checkbox_group == '预训练音色':
|
| 115 |
logging.info('get sft inference request')
|
| 116 |
set_all_random_seed(seed)
|
| 117 |
-
for i in
|
| 118 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
| 119 |
elif mode_checkbox_group == '3s极速复刻':
|
| 120 |
logging.info('get zero_shot inference request')
|
| 121 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
| 122 |
set_all_random_seed(seed)
|
| 123 |
-
for i in
|
| 124 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
| 125 |
elif mode_checkbox_group == '跨语种复刻':
|
| 126 |
logging.info('get cross_lingual inference request')
|
| 127 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
| 128 |
set_all_random_seed(seed)
|
| 129 |
-
for i in
|
| 130 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
| 131 |
else:
|
| 132 |
logging.info('get instruct inference request')
|
| 133 |
set_all_random_seed(seed)
|
| 134 |
-
for i in
|
| 135 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
| 136 |
|
| 137 |
# SDK模型下载
|
|
@@ -157,9 +157,10 @@ parser.add_argument('--model_dir',
|
|
| 157 |
args = parser.parse_args()
|
| 158 |
|
| 159 |
cosyvoice_instance = None
|
| 160 |
-
|
| 161 |
@spaces.GPU
|
| 162 |
-
def
|
|
|
|
| 163 |
# 在这里加入你需要的处理逻辑
|
| 164 |
if cosyvoice_instance is not None:
|
| 165 |
return cosyvoice_instance
|
|
@@ -168,9 +169,8 @@ def create_cosyvoice(model_dir):
|
|
| 168 |
|
| 169 |
@spaces.GPU
|
| 170 |
def load_sft_options():
|
| 171 |
-
return
|
| 172 |
-
|
| 173 |
-
cosyvoice = create_cosyvoice(args.model_dir)
|
| 174 |
|
| 175 |
prompt_sr, target_sr = 16000, 22050
|
| 176 |
default_data = np.zeros(target_sr)
|
|
|
|
| 72 |
prompt_wav = None
|
| 73 |
# if instruct mode, please make sure that model is iic/CosyVoice-300M-Instruct and not cross_lingual mode
|
| 74 |
if mode_checkbox_group in ['自然语言控制']:
|
| 75 |
+
if get_cosyvoice().frontend.instruct is False:
|
| 76 |
gr.Warning('您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型'.format(args.model_dir))
|
| 77 |
yield (target_sr, default_data)
|
| 78 |
if instruct_text == '':
|
|
|
|
| 82 |
gr.Info('您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略')
|
| 83 |
# if cross_lingual mode, please make sure that model is iic/CosyVoice-300M and tts_text prompt_text are different language
|
| 84 |
if mode_checkbox_group in ['跨语种复刻']:
|
| 85 |
+
if get_cosyvoice().frontend.instruct is True:
|
| 86 |
gr.Warning('您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型'.format(args.model_dir))
|
| 87 |
yield (target_sr, default_data)
|
| 88 |
if instruct_text != '':
|
|
|
|
| 114 |
if mode_checkbox_group == '预训练音色':
|
| 115 |
logging.info('get sft inference request')
|
| 116 |
set_all_random_seed(seed)
|
| 117 |
+
for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
|
| 118 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
| 119 |
elif mode_checkbox_group == '3s极速复刻':
|
| 120 |
logging.info('get zero_shot inference request')
|
| 121 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
| 122 |
set_all_random_seed(seed)
|
| 123 |
+
for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
|
| 124 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
| 125 |
elif mode_checkbox_group == '跨语种复刻':
|
| 126 |
logging.info('get cross_lingual inference request')
|
| 127 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
| 128 |
set_all_random_seed(seed)
|
| 129 |
+
for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
|
| 130 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
| 131 |
else:
|
| 132 |
logging.info('get instruct inference request')
|
| 133 |
set_all_random_seed(seed)
|
| 134 |
+
for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
|
| 135 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
| 136 |
|
| 137 |
# SDK模型下载
|
|
|
|
| 157 |
args = parser.parse_args()
|
| 158 |
|
| 159 |
cosyvoice_instance = None
|
| 160 |
+
model_dir=args.model_dir
|
| 161 |
@spaces.GPU
|
| 162 |
+
def get_cosyvoice():
|
| 163 |
+
global cosyvoice_instance, model_dir
|
| 164 |
# 在这里加入你需要的处理逻辑
|
| 165 |
if cosyvoice_instance is not None:
|
| 166 |
return cosyvoice_instance
|
|
|
|
| 169 |
|
| 170 |
@spaces.GPU
|
| 171 |
def load_sft_options():
|
| 172 |
+
return get_cosyvoice().list_avaliable_spks()
|
| 173 |
+
|
|
|
|
| 174 |
|
| 175 |
prompt_sr, target_sr = 16000, 22050
|
| 176 |
default_data = np.zeros(target_sr)
|