RUI-LONG commited on
Commit
65caffe
·
1 Parent(s): 78068b2
Files changed (2) hide show
  1. app.py +39 -10
  2. requirements.txt +3 -4
app.py CHANGED
@@ -41,7 +41,9 @@ model_loader.load("char2")
41
 
42
 
43
  def tts(
 
44
  speed,
 
45
  tts_text,
46
  tts_voice,
47
  f0_up_key,
@@ -58,6 +60,7 @@ def tts(
58
  print(tts_text)
59
  print(f"tts_voice: {tts_voice}")
60
  print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
 
61
  try:
62
  if limitation and len(tts_text) > 280:
63
  print("Error: Text too long")
@@ -66,26 +69,41 @@ def tts(
66
  None,
67
  None,
68
  )
69
- tgt_sr, net_g, vc, version, index_file, if_f0 = (
70
- model_loader.tgt_sr,
71
- model_loader.net_g,
72
- model_loader.vc,
73
- model_loader.version,
74
- model_loader.index_file,
75
- model_loader.if_f0,
76
- )
77
  t0 = time.time()
78
  if speed >= 0:
79
  speed_str = f"+{speed}%"
80
  else:
81
  speed_str = f"{speed}%"
 
 
 
 
82
  asyncio.run(
83
  edge_tts.Communicate(
84
- tts_text, "-".join(tts_voice.split("-")[:-1]), rate=speed_str
85
  ).save(edge_output_filename)
86
  )
87
  t1 = time.time()
88
  edge_time = t1 - t0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  audio, sr = librosa.load(edge_output_filename, sr=16000, mono=True)
90
  duration = len(audio) / sr
91
  print(f"Audio duration: {duration}s")
@@ -176,10 +194,19 @@ with app:
176
  step=10,
177
  interactive=True,
178
  )
 
 
 
 
 
 
 
 
179
  tts_text = gr.Textbox(
180
  label="Input Text",
181
- value="Nova says: Happy New Year, yaaaaaaaaa",
182
  )
 
183
  with gr.Column():
184
  but0 = gr.Button("Convert", variant="primary")
185
  info_text = gr.Textbox(label="Output info")
@@ -188,7 +215,9 @@ with app:
188
  but0.click(
189
  tts,
190
  [
 
191
  speed,
 
192
  tts_text,
193
  tts_voice,
194
  f0_key_up,
 
41
 
42
 
43
  def tts(
44
+ rvc,
45
  speed,
46
+ pitch,
47
  tts_text,
48
  tts_voice,
49
  f0_up_key,
 
60
  print(tts_text)
61
  print(f"tts_voice: {tts_voice}")
62
  print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
63
+
64
  try:
65
  if limitation and len(tts_text) > 280:
66
  print("Error: Text too long")
 
69
  None,
70
  None,
71
  )
72
+
 
 
 
 
 
 
 
73
  t0 = time.time()
74
  if speed >= 0:
75
  speed_str = f"+{speed}%"
76
  else:
77
  speed_str = f"{speed}%"
78
+ if pitch >= 0:
79
+ pitch = f'+{pitch}Hz'
80
+ else:
81
+ pitch = f'{pitch}Hz'
82
  asyncio.run(
83
  edge_tts.Communicate(
84
+ tts_text, "-".join(tts_voice.split("-")[:-1]), rate=speed_str, pitch=pitch
85
  ).save(edge_output_filename)
86
  )
87
  t1 = time.time()
88
  edge_time = t1 - t0
89
+ # with open(edge_output_filename, "rb") as f:
90
+ # audio_opt = f.read()
91
+ if not rvc:
92
+ info = f"Success. Time: edge-tts: {edge_time}s"
93
+ print(info)
94
+ return (
95
+ info,
96
+ edge_output_filename,
97
+ )
98
+
99
+ tgt_sr, net_g, vc, version, index_file, if_f0 = (
100
+ model_loader.tgt_sr,
101
+ model_loader.net_g,
102
+ model_loader.vc,
103
+ model_loader.version,
104
+ model_loader.index_file,
105
+ model_loader.if_f0,
106
+ )
107
  audio, sr = librosa.load(edge_output_filename, sr=16000, mono=True)
108
  duration = len(audio) / sr
109
  print(f"Audio duration: {duration}s")
 
194
  step=10,
195
  interactive=True,
196
  )
197
+ pitch = gr.Slider(
198
+ minimum=-100,
199
+ maximum=100,
200
+ label="Speech pitch",
201
+ value=0,
202
+ step=5,
203
+ interactive=True,
204
+ )
205
  tts_text = gr.Textbox(
206
  label="Input Text",
207
+ value="I'm never gonna let you down I'm always gonna build you up",
208
  )
209
+ rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=True)
210
  with gr.Column():
211
  but0 = gr.Button("Convert", variant="primary")
212
  info_text = gr.Textbox(label="Output info")
 
215
  but0.click(
216
  tts,
217
  [
218
+ rvc,
219
  speed,
220
+ pitch,
221
  tts_text,
222
  tts_voice,
223
  f0_key_up,
requirements.txt CHANGED
@@ -1,12 +1,11 @@
1
- edge_tts==6.1.7
2
  fairseq==0.12.2
3
  faiss_cpu==1.7.4
4
- gradio==3.38.0
5
  librosa==0.9.1
6
  numpy==1.22.4
7
  praat-parselmouth==0.4.3
8
  pyworld==0.3.4
9
  torchcrepe==0.0.21
10
  scikit-learn==1.3.0
11
- gradio==3.38.0
12
- gradio_client==0.8.1
 
1
+ edge_tts==6.1.12
2
  fairseq==0.12.2
3
  faiss_cpu==1.7.4
 
4
  librosa==0.9.1
5
  numpy==1.22.4
6
  praat-parselmouth==0.4.3
7
  pyworld==0.3.4
8
  torchcrepe==0.0.21
9
  scikit-learn==1.3.0
10
+ gradio
11
+ gradio_client