RomanianRVC

Runtime error

App Files Files Community

Spark808 commited on Dec 4, 2023

Commit

ccca5f2

1 Parent(s): 871f452

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -30

app.py CHANGED Viewed

@@ -6,10 +6,11 @@ import logging
 from datetime import datetime
 import gradio as gr
-import torch
-from fairseq import checkpoint_utils
 import librosa
 from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
 from vc_infer_pipeline import VC
 from config import is_half, device
@@ -18,10 +19,10 @@ logging.getLogger("numba").setLevel(logging.WARNING)
 def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
-    def vc_fn(vc_audio_file, vc_transpose, vc_f0method, vc_index_ratio):
         try:
-            # Load the audio file uploaded via Gradio
-            audio, sr = librosa.load(vc_audio_file.name, sr=None, mono=True)
             # Your existing processing logic for audio
             times = [0, 0, 0]
@@ -44,7 +45,7 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
                 f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
             )
             return "Success", (tgt_sr, audio_opt)
-        except Exception as e:
             info = traceback.format_exc()
             print(info)
             return info, (None, None)
@@ -92,7 +93,7 @@ if __name__ == '__main__':
         else:
             net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
         del net_g.enc_q
-        net_g.load_state_dict(cpt["weight"], strict=False)
         net_g.eval().to(device)
         if is_half:
             net_g = net_g.half()
@@ -101,44 +102,44 @@ if __name__ == '__main__':
         vc = VC(tgt_sr, device, is_half)
         models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
-    with gr.Blocks() as app:
-        gr.Markdown(
             "# <center> RVC generator\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
             "[![buymeacoffee](https://badgen.net/badge/icon/buymeacoffee?icon=buymeacoffee&label)](https://www.buymeacoffee.com/spark808)\n\n"
         )
-        with gr.Tabs():
             for (name, title, cover, vc_fn) in models:
-                with gr.TabItem(name):
-                    with gr.Row():
-                        gr.Markdown(
                             '<div align="center">'
                             f'<div>{title}</div>\n' +
                             (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "") +
                             '</div>'
                         )
-                    with gr.Row():
-                        with gr.Column():
-                            # Use file upload instead of microphone
-                            vc_audio_file = gr.File(label="Upload your audio file")
-                            vc_transpose = gr.Number(label="Transpose", value=0)
-                            vc_f0method = gr.Radio(
                                 label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
                                 choices=["pm", "harvest"],
-                                value="harvest",
                                 interactive=True,
                             )
-                            vc_index_ratio = gr.Slider(
-                                minimum=0,
-                                maximum=1,
                                 label="Retrieval feature ratio",
-                                value=0.6,
                                 interactive=True,
                             )
-                            vc_submit = gr.Button("Generate", variant="primary")
-                        with gr.Column():
-                            vc_output1 = gr.Textbox(label="Output Message")
-                            vc_output2 = gr.Audio(label="Output Audio")
-                vc_submit.click(vc_fn, [vc_audio_file, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
-        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)

 from datetime import datetime
 import gradio as gr
+import numpy as np
 import librosa
+import torch
+from fairseq import checkpoint_utils
 from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
 from vc_infer_pipeline import VC
 from config import is_half, device
 def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
+    def vc_fn(vc_transpose, vc_f0method, vc_index_ratio):
         try:
+            # Get the recorded audio from the microphone
+            audio, sr = vc_microphone.record(num_frames=16000)  # Adjust the sample rate if needed
             # Your existing processing logic for audio
             times = [0, 0, 0]
                 f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
             )
             return "Success", (tgt_sr, audio_opt)
+        except:
             info = traceback.format_exc()
             print(info)
             return info, (None, None)
         else:
             net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
         del net_g.enc_q
+        print(net_g.load_state_dict(cpt["weight"], strict=False))
         net_g.eval().to(device)
         if is_half:
             net_g = net_g.half()
         vc = VC(tgt_sr, device, is_half)
         models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
+    with gr.Interface() as app:
+        gr.markdown(
             "# <center> RVC generator\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
             "[![buymeacoffee](https://badgen.net/badge/icon/buymeacoffee?icon=buymeacoffee&label)](https://www.buymeacoffee.com/spark808)\n\n"
         )
+        with gr.tabs():
             for (name, title, cover, vc_fn) in models:
+                with gr.tab(name):
+                    with gr.row():
+                        gr.markdown(
                             '<div align="center">'
                             f'<div>{title}</div>\n' +
                             (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "") +
                             '</div>'
                         )
+                    with gr.row():
+                        with gr.column():
+                            # Use microphone instead of file upload
+                            vc_microphone = gr.microphone(label="Record your voice")
+                            vc_transpose = gr.number(label="Transpose", default=0)
+                            vc_f0method = gr.radio(
                                 label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
                                 choices=["pm", "harvest"],
+                                default="harvest",
                                 interactive=True,
                             )
+                            vc_index_ratio = gr.slider(
+                                min_value=0,
+                                max_value=1,
                                 label="Retrieval feature ratio",
+                                default=0.6,
                                 interactive=True,
                             )
+                            vc_submit = gr.button("Generate", type="primary")
+                        with gr.column():
+                            vc_output1 = gr.textbox(label="Output Message")
+                            vc_output2 = gr.audio(label="Output Audio")
+                vc_submit.click(vc_fn, [vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
+        app.run(share=args.share)