Spark808 commited on
Commit
ccca5f2
·
1 Parent(s): 871f452

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -30
app.py CHANGED
@@ -6,10 +6,11 @@ import logging
6
  from datetime import datetime
7
 
8
  import gradio as gr
9
- import torch
10
- from fairseq import checkpoint_utils
11
  import librosa
 
12
 
 
13
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
14
  from vc_infer_pipeline import VC
15
  from config import is_half, device
@@ -18,10 +19,10 @@ logging.getLogger("numba").setLevel(logging.WARNING)
18
 
19
 
20
  def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
21
- def vc_fn(vc_audio_file, vc_transpose, vc_f0method, vc_index_ratio):
22
  try:
23
- # Load the audio file uploaded via Gradio
24
- audio, sr = librosa.load(vc_audio_file.name, sr=None, mono=True)
25
 
26
  # Your existing processing logic for audio
27
  times = [0, 0, 0]
@@ -44,7 +45,7 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
44
  f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
45
  )
46
  return "Success", (tgt_sr, audio_opt)
47
- except Exception as e:
48
  info = traceback.format_exc()
49
  print(info)
50
  return info, (None, None)
@@ -92,7 +93,7 @@ if __name__ == '__main__':
92
  else:
93
  net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
94
  del net_g.enc_q
95
- net_g.load_state_dict(cpt["weight"], strict=False)
96
  net_g.eval().to(device)
97
  if is_half:
98
  net_g = net_g.half()
@@ -101,44 +102,44 @@ if __name__ == '__main__':
101
  vc = VC(tgt_sr, device, is_half)
102
  models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
103
 
104
- with gr.Blocks() as app:
105
- gr.Markdown(
106
  "# <center> RVC generator\n"
107
  "## <center> The input audio should be clean and pure voice without background music.\n"
108
  "[![buymeacoffee](https://badgen.net/badge/icon/buymeacoffee?icon=buymeacoffee&label)](https://www.buymeacoffee.com/spark808)\n\n"
109
  )
110
- with gr.Tabs():
111
  for (name, title, cover, vc_fn) in models:
112
- with gr.TabItem(name):
113
- with gr.Row():
114
- gr.Markdown(
115
  '<div align="center">'
116
  f'<div>{title}</div>\n' +
117
  (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "") +
118
  '</div>'
119
  )
120
- with gr.Row():
121
- with gr.Column():
122
- # Use file upload instead of microphone
123
- vc_audio_file = gr.File(label="Upload your audio file")
124
- vc_transpose = gr.Number(label="Transpose", value=0)
125
- vc_f0method = gr.Radio(
126
  label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
127
  choices=["pm", "harvest"],
128
- value="harvest",
129
  interactive=True,
130
  )
131
- vc_index_ratio = gr.Slider(
132
- minimum=0,
133
- maximum=1,
134
  label="Retrieval feature ratio",
135
- value=0.6,
136
  interactive=True,
137
  )
138
- vc_submit = gr.Button("Generate", variant="primary")
139
- with gr.Column():
140
- vc_output1 = gr.Textbox(label="Output Message")
141
- vc_output2 = gr.Audio(label="Output Audio")
142
 
143
- vc_submit.click(vc_fn, [vc_audio_file, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
144
- app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)
 
6
  from datetime import datetime
7
 
8
  import gradio as gr
9
+ import numpy as np
 
10
  import librosa
11
+ import torch
12
 
13
+ from fairseq import checkpoint_utils
14
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
15
  from vc_infer_pipeline import VC
16
  from config import is_half, device
 
19
 
20
 
21
  def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
22
+ def vc_fn(vc_transpose, vc_f0method, vc_index_ratio):
23
  try:
24
+ # Get the recorded audio from the microphone
25
+ audio, sr = vc_microphone.record(num_frames=16000) # Adjust the sample rate if needed
26
 
27
  # Your existing processing logic for audio
28
  times = [0, 0, 0]
 
45
  f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
46
  )
47
  return "Success", (tgt_sr, audio_opt)
48
+ except:
49
  info = traceback.format_exc()
50
  print(info)
51
  return info, (None, None)
 
93
  else:
94
  net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
95
  del net_g.enc_q
96
+ print(net_g.load_state_dict(cpt["weight"], strict=False))
97
  net_g.eval().to(device)
98
  if is_half:
99
  net_g = net_g.half()
 
102
  vc = VC(tgt_sr, device, is_half)
103
  models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
104
 
105
+ with gr.Interface() as app:
106
+ gr.markdown(
107
  "# <center> RVC generator\n"
108
  "## <center> The input audio should be clean and pure voice without background music.\n"
109
  "[![buymeacoffee](https://badgen.net/badge/icon/buymeacoffee?icon=buymeacoffee&label)](https://www.buymeacoffee.com/spark808)\n\n"
110
  )
111
+ with gr.tabs():
112
  for (name, title, cover, vc_fn) in models:
113
+ with gr.tab(name):
114
+ with gr.row():
115
+ gr.markdown(
116
  '<div align="center">'
117
  f'<div>{title}</div>\n' +
118
  (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "") +
119
  '</div>'
120
  )
121
+ with gr.row():
122
+ with gr.column():
123
+ # Use microphone instead of file upload
124
+ vc_microphone = gr.microphone(label="Record your voice")
125
+ vc_transpose = gr.number(label="Transpose", default=0)
126
+ vc_f0method = gr.radio(
127
  label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
128
  choices=["pm", "harvest"],
129
+ default="harvest",
130
  interactive=True,
131
  )
132
+ vc_index_ratio = gr.slider(
133
+ min_value=0,
134
+ max_value=1,
135
  label="Retrieval feature ratio",
136
+ default=0.6,
137
  interactive=True,
138
  )
139
+ vc_submit = gr.button("Generate", type="primary")
140
+ with gr.column():
141
+ vc_output1 = gr.textbox(label="Output Message")
142
+ vc_output2 = gr.audio(label="Output Audio")
143
 
144
+ vc_submit.click(vc_fn, [vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
145
+ app.run(share=args.share)