Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,10 +6,11 @@ import logging
|
|
| 6 |
from datetime import datetime
|
| 7 |
|
| 8 |
import gradio as gr
|
| 9 |
-
import
|
| 10 |
-
from fairseq import checkpoint_utils
|
| 11 |
import librosa
|
|
|
|
| 12 |
|
|
|
|
| 13 |
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
|
| 14 |
from vc_infer_pipeline import VC
|
| 15 |
from config import is_half, device
|
|
@@ -18,10 +19,10 @@ logging.getLogger("numba").setLevel(logging.WARNING)
|
|
| 18 |
|
| 19 |
|
| 20 |
def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
|
| 21 |
-
def vc_fn(
|
| 22 |
try:
|
| 23 |
-
#
|
| 24 |
-
audio, sr =
|
| 25 |
|
| 26 |
# Your existing processing logic for audio
|
| 27 |
times = [0, 0, 0]
|
|
@@ -44,7 +45,7 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
|
|
| 44 |
f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
|
| 45 |
)
|
| 46 |
return "Success", (tgt_sr, audio_opt)
|
| 47 |
-
except
|
| 48 |
info = traceback.format_exc()
|
| 49 |
print(info)
|
| 50 |
return info, (None, None)
|
|
@@ -92,7 +93,7 @@ if __name__ == '__main__':
|
|
| 92 |
else:
|
| 93 |
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
| 94 |
del net_g.enc_q
|
| 95 |
-
net_g.load_state_dict(cpt["weight"], strict=False)
|
| 96 |
net_g.eval().to(device)
|
| 97 |
if is_half:
|
| 98 |
net_g = net_g.half()
|
|
@@ -101,44 +102,44 @@ if __name__ == '__main__':
|
|
| 101 |
vc = VC(tgt_sr, device, is_half)
|
| 102 |
models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
|
| 103 |
|
| 104 |
-
with gr.
|
| 105 |
-
gr.
|
| 106 |
"# <center> RVC generator\n"
|
| 107 |
"## <center> The input audio should be clean and pure voice without background music.\n"
|
| 108 |
"[](https://www.buymeacoffee.com/spark808)\n\n"
|
| 109 |
)
|
| 110 |
-
with gr.
|
| 111 |
for (name, title, cover, vc_fn) in models:
|
| 112 |
-
with gr.
|
| 113 |
-
with gr.
|
| 114 |
-
gr.
|
| 115 |
'<div align="center">'
|
| 116 |
f'<div>{title}</div>\n' +
|
| 117 |
(f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "") +
|
| 118 |
'</div>'
|
| 119 |
)
|
| 120 |
-
with gr.
|
| 121 |
-
with gr.
|
| 122 |
-
# Use
|
| 123 |
-
|
| 124 |
-
vc_transpose = gr.
|
| 125 |
-
vc_f0method = gr.
|
| 126 |
label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
|
| 127 |
choices=["pm", "harvest"],
|
| 128 |
-
|
| 129 |
interactive=True,
|
| 130 |
)
|
| 131 |
-
vc_index_ratio = gr.
|
| 132 |
-
|
| 133 |
-
|
| 134 |
label="Retrieval feature ratio",
|
| 135 |
-
|
| 136 |
interactive=True,
|
| 137 |
)
|
| 138 |
-
vc_submit = gr.
|
| 139 |
-
with gr.
|
| 140 |
-
vc_output1 = gr.
|
| 141 |
-
vc_output2 = gr.
|
| 142 |
|
| 143 |
-
vc_submit.click(vc_fn, [
|
| 144 |
-
app.
|
|
|
|
| 6 |
from datetime import datetime
|
| 7 |
|
| 8 |
import gradio as gr
|
| 9 |
+
import numpy as np
|
|
|
|
| 10 |
import librosa
|
| 11 |
+
import torch
|
| 12 |
|
| 13 |
+
from fairseq import checkpoint_utils
|
| 14 |
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
|
| 15 |
from vc_infer_pipeline import VC
|
| 16 |
from config import is_half, device
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
|
| 22 |
+
def vc_fn(vc_transpose, vc_f0method, vc_index_ratio):
|
| 23 |
try:
|
| 24 |
+
# Get the recorded audio from the microphone
|
| 25 |
+
audio, sr = vc_microphone.record(num_frames=16000) # Adjust the sample rate if needed
|
| 26 |
|
| 27 |
# Your existing processing logic for audio
|
| 28 |
times = [0, 0, 0]
|
|
|
|
| 45 |
f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
|
| 46 |
)
|
| 47 |
return "Success", (tgt_sr, audio_opt)
|
| 48 |
+
except:
|
| 49 |
info = traceback.format_exc()
|
| 50 |
print(info)
|
| 51 |
return info, (None, None)
|
|
|
|
| 93 |
else:
|
| 94 |
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
| 95 |
del net_g.enc_q
|
| 96 |
+
print(net_g.load_state_dict(cpt["weight"], strict=False))
|
| 97 |
net_g.eval().to(device)
|
| 98 |
if is_half:
|
| 99 |
net_g = net_g.half()
|
|
|
|
| 102 |
vc = VC(tgt_sr, device, is_half)
|
| 103 |
models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
|
| 104 |
|
| 105 |
+
with gr.Interface() as app:
|
| 106 |
+
gr.markdown(
|
| 107 |
"# <center> RVC generator\n"
|
| 108 |
"## <center> The input audio should be clean and pure voice without background music.\n"
|
| 109 |
"[](https://www.buymeacoffee.com/spark808)\n\n"
|
| 110 |
)
|
| 111 |
+
with gr.tabs():
|
| 112 |
for (name, title, cover, vc_fn) in models:
|
| 113 |
+
with gr.tab(name):
|
| 114 |
+
with gr.row():
|
| 115 |
+
gr.markdown(
|
| 116 |
'<div align="center">'
|
| 117 |
f'<div>{title}</div>\n' +
|
| 118 |
(f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "") +
|
| 119 |
'</div>'
|
| 120 |
)
|
| 121 |
+
with gr.row():
|
| 122 |
+
with gr.column():
|
| 123 |
+
# Use microphone instead of file upload
|
| 124 |
+
vc_microphone = gr.microphone(label="Record your voice")
|
| 125 |
+
vc_transpose = gr.number(label="Transpose", default=0)
|
| 126 |
+
vc_f0method = gr.radio(
|
| 127 |
label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
|
| 128 |
choices=["pm", "harvest"],
|
| 129 |
+
default="harvest",
|
| 130 |
interactive=True,
|
| 131 |
)
|
| 132 |
+
vc_index_ratio = gr.slider(
|
| 133 |
+
min_value=0,
|
| 134 |
+
max_value=1,
|
| 135 |
label="Retrieval feature ratio",
|
| 136 |
+
default=0.6,
|
| 137 |
interactive=True,
|
| 138 |
)
|
| 139 |
+
vc_submit = gr.button("Generate", type="primary")
|
| 140 |
+
with gr.column():
|
| 141 |
+
vc_output1 = gr.textbox(label="Output Message")
|
| 142 |
+
vc_output2 = gr.audio(label="Output Audio")
|
| 143 |
|
| 144 |
+
vc_submit.click(vc_fn, [vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
|
| 145 |
+
app.run(share=args.share)
|