Chuatury commited on
Commit
15cc387
·
unverified ·
1 Parent(s): b3283e2
Files changed (1) hide show
  1. app_locally.py +39 -49
app_locally.py CHANGED
@@ -53,50 +53,49 @@ def predict(prompt, speaker_wav, transform_wav):
53
  # initialize a empty info
54
  text_hint = ""
55
 
56
- # first detect the input language
57
- language_predicted = langid.classify(prompt)[0].strip()
58
- print(f"Detected language:{language_predicted}")
59
-
60
- if language_predicted not in supported_languages:
61
- text_hint += f"[ERROR] The detected language {language_predicted} for your input text is not in our Supported Languages: {supported_languages}\n"
62
- gr.Warning(
63
- f"The detected language {language_predicted} for your input text is not in our Supported Languages: {supported_languages}"
64
- )
65
 
66
- return (
67
- text_hint,
68
- None,
69
- None,
 
 
 
70
  )
 
 
 
 
 
 
 
 
 
 
71
 
72
- if language_predicted == "zh":
73
- tts_model = zh_base_speaker_tts
74
- source_se = zh_source_se
75
- language = "Chinese"
 
76
 
77
- else:
78
- tts_model = en_base_speaker_tts
79
- source_se = en_source_default_se
80
- language = "English"
81
-
82
- # if len(prompt) < 2:
83
- # text_hint += f"[ERROR] Please give a longer prompt text \n"
84
- # gr.Warning("Please give a longer prompt text")
85
- # return (
86
- # text_hint,
87
- # None,
88
- # None,
89
- # )
90
- # if len(prompt) > 200:
91
- # text_hint += f"[ERROR] Text length limited to 200 characters for this demo, please try shorter text. You can clone our open-source repo and try for your usage \n"
92
- # gr.Warning(
93
- # "Text length limited to 200 characters for this demo, please try shorter text. You can clone our open-source repo for your usage"
94
- # )
95
- # return (
96
- # text_hint,
97
- # None,
98
- # None,
99
- # )
100
 
101
  # note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
102
  try:
@@ -117,15 +116,6 @@ def predict(prompt, speaker_wav, transform_wav):
117
  None,
118
  )
119
 
120
- if transform_wav is not None:
121
- # if transform_wav is provided, use it as the source audio
122
- src_path = transform_wav
123
- text_hint += f"Using transform audio {src_path} as source audio \n"
124
- else:
125
- text_hint += f"Using TTS to generate source audio from the prompt text \n"
126
- src_path = f"{output_dir}/tmp.wav"
127
- tts_model.tts(prompt, src_path, speaker="default", language=language)
128
-
129
  save_path = f"{output_dir}/output.wav"
130
  # Run the tone color converter
131
  encode_message = "@MyShell"
 
53
  # initialize a empty info
54
  text_hint = ""
55
 
56
+ if transform_wav is not None:
57
+ # if transform_wav is provided, use it as the source audio
58
+ src_path = transform_wav
59
+ text_hint += f"Using transform audio {src_path} as source audio \n"
 
 
 
 
 
60
 
61
+ # extract source_se
62
+ source_se, _ = se_extractor.get_se(
63
+ speaker_wav,
64
+ tone_color_converter,
65
+ target_dir="processed",
66
+ max_length=60.0,
67
+ vad=True,
68
  )
69
+ else:
70
+ # first detect the input language
71
+ language_predicted = langid.classify(prompt)[0].strip()
72
+ print(f"Detected language:{language_predicted}")
73
+
74
+ if language_predicted not in supported_languages:
75
+ text_hint += f"[ERROR] The detected language {language_predicted} for your input text is not in our Supported Languages: {supported_languages}\n"
76
+ gr.Warning(
77
+ f"The detected language {language_predicted} for your input text is not in our Supported Languages: {supported_languages}"
78
+ )
79
 
80
+ return (
81
+ text_hint,
82
+ None,
83
+ None,
84
+ )
85
 
86
+ if language_predicted == "zh":
87
+ tts_model = zh_base_speaker_tts
88
+ source_se = zh_source_se
89
+ language = "Chinese"
90
+
91
+ else:
92
+ tts_model = en_base_speaker_tts
93
+ source_se = en_source_default_se
94
+ language = "English"
95
+
96
+ text_hint += f"Using TTS to generate source audio from the prompt text \n"
97
+ src_path = f"{output_dir}/tmp.wav"
98
+ tts_model.tts(prompt, src_path, speaker="default", language=language)
 
 
 
 
 
 
 
 
 
 
99
 
100
  # note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
101
  try:
 
116
  None,
117
  )
118
 
 
 
 
 
 
 
 
 
 
119
  save_path = f"{output_dir}/output.wav"
120
  # Run the tone color converter
121
  encode_message = "@MyShell"