Spaces:

RUI-LONG
/

test-rvc

Sleeping

App Files Files Community

RUI-LONG commited on Jun 18, 2024

Commit

65caffe

1 Parent(s): 78068b2

update

Browse files

Files changed (2) hide show

app.py +39 -10
requirements.txt +3 -4

app.py CHANGED Viewed

@@ -41,7 +41,9 @@ model_loader.load("char2")
 def tts(
     speed,
     tts_text,
     tts_voice,
     f0_up_key,
@@ -58,6 +60,7 @@ def tts(
     print(tts_text)
     print(f"tts_voice: {tts_voice}")
     print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
     try:
         if limitation and len(tts_text) > 280:
             print("Error: Text too long")
@@ -66,26 +69,41 @@ def tts(
                 None,
                 None,
             )
-        tgt_sr, net_g, vc, version, index_file, if_f0 = (
-            model_loader.tgt_sr,
-            model_loader.net_g,
-            model_loader.vc,
-            model_loader.version,
-            model_loader.index_file,
-            model_loader.if_f0,
-        )
         t0 = time.time()
         if speed >= 0:
             speed_str = f"+{speed}%"
         else:
             speed_str = f"{speed}%"
         asyncio.run(
             edge_tts.Communicate(
-                tts_text, "-".join(tts_voice.split("-")[:-1]), rate=speed_str
             ).save(edge_output_filename)
         )
         t1 = time.time()
         edge_time = t1 - t0
         audio, sr = librosa.load(edge_output_filename, sr=16000, mono=True)
         duration = len(audio) / sr
         print(f"Audio duration: {duration}s")
@@ -176,10 +194,19 @@ with app:
                 step=10,
                 interactive=True,
             )
             tts_text = gr.Textbox(
                 label="Input Text",
-                value="Nova says: Happy New Year, yaaaaaaaaa",
             )
         with gr.Column():
             but0 = gr.Button("Convert", variant="primary")
             info_text = gr.Textbox(label="Output info")
@@ -188,7 +215,9 @@ with app:
         but0.click(
             tts,
             [
                 speed,
                 tts_text,
                 tts_voice,
                 f0_key_up,

 def tts(
+    rvc,
     speed,
+    pitch,
     tts_text,
     tts_voice,
     f0_up_key,
     print(tts_text)
     print(f"tts_voice: {tts_voice}")
     print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
     try:
         if limitation and len(tts_text) > 280:
             print("Error: Text too long")
                 None,
                 None,
             )
         t0 = time.time()
         if speed >= 0:
             speed_str = f"+{speed}%"
         else:
             speed_str = f"{speed}%"
+        if pitch >= 0:
+            pitch = f'+{pitch}Hz'
+        else:
+            pitch = f'{pitch}Hz'
         asyncio.run(
             edge_tts.Communicate(
+                tts_text, "-".join(tts_voice.split("-")[:-1]), rate=speed_str, pitch=pitch
             ).save(edge_output_filename)
         )
         t1 = time.time()
         edge_time = t1 - t0
+        # with open(edge_output_filename, "rb") as f:
+        #     audio_opt = f.read()
+        if not rvc:
+            info = f"Success. Time: edge-tts: {edge_time}s"
+            print(info)
+            return (
+                info,
+                edge_output_filename,
+            )
+        tgt_sr, net_g, vc, version, index_file, if_f0 = (
+            model_loader.tgt_sr,
+            model_loader.net_g,
+            model_loader.vc,
+            model_loader.version,
+            model_loader.index_file,
+            model_loader.if_f0,
+        )
         audio, sr = librosa.load(edge_output_filename, sr=16000, mono=True)
         duration = len(audio) / sr
         print(f"Audio duration: {duration}s")
                 step=10,
                 interactive=True,
             )
+            pitch = gr.Slider(
+                minimum=-100,
+                maximum=100,
+                label="Speech pitch",
+                value=0,
+                step=5,
+                interactive=True,
+            )
             tts_text = gr.Textbox(
                 label="Input Text",
+                value="I'm never gonna let you down I'm always gonna build you up",
             )
+            rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=True)
         with gr.Column():
             but0 = gr.Button("Convert", variant="primary")
             info_text = gr.Textbox(label="Output info")
         but0.click(
             tts,
             [
+                rvc,
                 speed,
+                pitch,
                 tts_text,
                 tts_voice,
                 f0_key_up,

requirements.txt CHANGED Viewed

@@ -1,12 +1,11 @@
-edge_tts==6.1.7
 fairseq==0.12.2
 faiss_cpu==1.7.4
-gradio==3.38.0
 librosa==0.9.1
 numpy==1.22.4
 praat-parselmouth==0.4.3
 pyworld==0.3.4
 torchcrepe==0.0.21
 scikit-learn==1.3.0
-gradio==3.38.0
-gradio_client==0.8.1

+edge_tts==6.1.12
 fairseq==0.12.2
 faiss_cpu==1.7.4
 librosa==0.9.1
 numpy==1.22.4
 praat-parselmouth==0.4.3
 pyworld==0.3.4
 torchcrepe==0.0.21
 scikit-learn==1.3.0
+gradio
+gradio_client