swc2 commited on
Commit
d72375d
·
1 Parent(s): 679330d

app revise test

Browse files
Files changed (1) hide show
  1. app.py +26 -85
app.py CHANGED
@@ -1,87 +1,28 @@
1
  import gradio as gr
2
- import torch as th
3
- import numpy as np
4
- from nnet.spex_plus import SpEx_Plus
5
- from utils.logger import get_logger
6
- from utils.audio import WaveReader, write_wav
7
-
8
- logger = get_logger(__name__)
9
-
10
- class NnetComputer(object):
11
- def __init__(self, cpt_dir, gpuid, nnet_conf):
12
- self.device = th.device("cuda:{}".format(gpuid)) if gpuid >= 0 else th.device("cpu")
13
- nnet = self._load_nnet(cpt_dir, nnet_conf)
14
- self.nnet = nnet.to(self.device) if gpuid >= 0 else nnet
15
- # set eval model
16
- self.nnet.eval()
17
-
18
- def _load_nnet(self, cpt_dir, nnet_conf):
19
- nnet = SpEx_Plus(**nnet_conf)
20
- cpt_fname = os.path.join(cpt_dir, "59.pt.tar")
21
- cpt = th.load(cpt_fname, map_location="cpu")
22
- nnet.load_state_dict(cpt["model_state_dict"])
23
- logger.info("Load checkpoint from {}, epoch {:d}".format(
24
- cpt_fname, cpt["epoch"]))
25
- return nnet
26
-
27
- def compute(self, samps, aux_samps, aux_samps_len):
28
- with th.no_grad():
29
- raw = th.tensor(samps, dtype=th.float32, device=self.device)
30
- aux = th.tensor(aux_samps, dtype=th.float32, device=self.device)
31
- aux_len = th.tensor(aux_samps_len, dtype=th.float32, device=self.device)
32
- aux = aux.unsqueeze(0)
33
- sps, sps2, sps3, spk_pred = self.nnet(raw, aux, aux_len)
34
- sp_samps = np.squeeze(sps.detach().cpu().numpy())
35
- return sp_samps
36
-
37
- def compute_output(input_audio, use_gpu, checkpoint, output_dir):
38
- # Prepare mix_input and aux_input based on the input_audio
39
- mix_input = {} # Modify this to include your mix_input
40
- aux_input = {} # Modify this to include your aux_input
41
-
42
- # Set GPU index based on the user's choice
43
- gpu_index = -1 if not use_gpu else 0
44
-
45
- # Run the computation
46
- nnet_conf = {
47
- "L1": int(0.0025 * 16000),
48
- "L2": int(0.01 * 16000),
49
- "L3": int(0.02 * 16000),
50
- "N": 256,
51
- "B": 8,
52
- "O": 256,
53
- "P": 512,
54
- "Q": 3,
55
- "num_spks": 395,
56
- "spk_embed_dim": 256,
57
- "causal": False
58
- }
59
- computer = NnetComputer(checkpoint, gpu_index, nnet_conf)
60
- for key, mix_samps in mix_input:
61
- aux_samps = aux_input[key]
62
- logger.info("Compute on utterance {}...".format(key))
63
- samps = computer.compute(mix_samps, aux_samps, len(aux_samps))
64
- norm = np.linalg.norm(mix_samps, np.inf)
65
- samps = samps[:mix_samps.size]
66
- # Normalize the output
67
- samps = samps * norm / np.max(np.abs(samps))
68
- # Write output to the specified directory
69
- write_wav(os.path.join(output_dir, "{}.wav".format(key)), samps, sample_rate=args.sample_rate)
70
- logger.info("Compute over {:d} utterances".format(len(mix_input)))
71
-
72
- # Define the Gradio interface
73
- inputs = [
74
- gr.Audio(name="input_audio", label="Input Audio"),
75
- gr.Checkbox(name="use_gpu", label="Use GPU"),
76
- gr.TextInput(name="checkpoint", label="Checkpoint Directory"),
77
- gr.TextInput(name="output_dir", label="Output Directory")
78
- ]
79
- output = gr.Interface(
80
- fn=compute_output,
81
- inputs=inputs,
82
- outputs=None,
83
- title="Audio Processing with Neural Network",
84
- description="Process audio input using a neural network model.",
85
- theme="compact"
86
  )
87
- output.launch()
 
 
 
1
  import gradio as gr
2
+ from inference import InferencePipeline
3
+
4
+ i = InferencePipeline()
5
+
6
+ def gradio_voice_conversion(audio_file_path):
7
+ """
8
+ Wrapper function to handle Gradio's audio input and pass the file path to the voice conversion function.
9
+ Gradio passes audio data as a tuple: (temp file path, sample rate).
10
+ """
11
+ # Gradio passes audio as (temp file path, sample rate)
12
+ #audio_file_path = audio_data[0] # Extract the file path
13
+ print(f"Here is the audio_file_path: {audio_file_path}")
14
+ #print(f"Here is the audio_file_path[0]: {audio_file_path[0]}")
15
+ return i.voice_conversion(audio_file_path)
16
+
17
+ # Define your Gradio interface
18
+ demo = gr.Interface(
19
+ fn=gradio_voice_conversion, # Use the wrapper function for voice conversion
20
+ inputs=gr.Audio(label="Record or upload your voice", type="filepath"), # Specify that you want the filepath
21
+ outputs=gr.Audio(label="Converted Voice"),
22
+ title="Voice Conversion Demo",
23
+ description="Voice Conversion: Transform the input voice to a target voice.",
24
+ allow_flagging="never"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
+
27
+ if __name__ == "__main__":
28
+ demo.launch()