Spaces:
Running
Running
app revise test
Browse files
app.py
CHANGED
|
@@ -1,87 +1,28 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
return nnet
|
| 26 |
-
|
| 27 |
-
def compute(self, samps, aux_samps, aux_samps_len):
|
| 28 |
-
with th.no_grad():
|
| 29 |
-
raw = th.tensor(samps, dtype=th.float32, device=self.device)
|
| 30 |
-
aux = th.tensor(aux_samps, dtype=th.float32, device=self.device)
|
| 31 |
-
aux_len = th.tensor(aux_samps_len, dtype=th.float32, device=self.device)
|
| 32 |
-
aux = aux.unsqueeze(0)
|
| 33 |
-
sps, sps2, sps3, spk_pred = self.nnet(raw, aux, aux_len)
|
| 34 |
-
sp_samps = np.squeeze(sps.detach().cpu().numpy())
|
| 35 |
-
return sp_samps
|
| 36 |
-
|
| 37 |
-
def compute_output(input_audio, use_gpu, checkpoint, output_dir):
|
| 38 |
-
# Prepare mix_input and aux_input based on the input_audio
|
| 39 |
-
mix_input = {} # Modify this to include your mix_input
|
| 40 |
-
aux_input = {} # Modify this to include your aux_input
|
| 41 |
-
|
| 42 |
-
# Set GPU index based on the user's choice
|
| 43 |
-
gpu_index = -1 if not use_gpu else 0
|
| 44 |
-
|
| 45 |
-
# Run the computation
|
| 46 |
-
nnet_conf = {
|
| 47 |
-
"L1": int(0.0025 * 16000),
|
| 48 |
-
"L2": int(0.01 * 16000),
|
| 49 |
-
"L3": int(0.02 * 16000),
|
| 50 |
-
"N": 256,
|
| 51 |
-
"B": 8,
|
| 52 |
-
"O": 256,
|
| 53 |
-
"P": 512,
|
| 54 |
-
"Q": 3,
|
| 55 |
-
"num_spks": 395,
|
| 56 |
-
"spk_embed_dim": 256,
|
| 57 |
-
"causal": False
|
| 58 |
-
}
|
| 59 |
-
computer = NnetComputer(checkpoint, gpu_index, nnet_conf)
|
| 60 |
-
for key, mix_samps in mix_input:
|
| 61 |
-
aux_samps = aux_input[key]
|
| 62 |
-
logger.info("Compute on utterance {}...".format(key))
|
| 63 |
-
samps = computer.compute(mix_samps, aux_samps, len(aux_samps))
|
| 64 |
-
norm = np.linalg.norm(mix_samps, np.inf)
|
| 65 |
-
samps = samps[:mix_samps.size]
|
| 66 |
-
# Normalize the output
|
| 67 |
-
samps = samps * norm / np.max(np.abs(samps))
|
| 68 |
-
# Write output to the specified directory
|
| 69 |
-
write_wav(os.path.join(output_dir, "{}.wav".format(key)), samps, sample_rate=args.sample_rate)
|
| 70 |
-
logger.info("Compute over {:d} utterances".format(len(mix_input)))
|
| 71 |
-
|
| 72 |
-
# Define the Gradio interface
|
| 73 |
-
inputs = [
|
| 74 |
-
gr.Audio(name="input_audio", label="Input Audio"),
|
| 75 |
-
gr.Checkbox(name="use_gpu", label="Use GPU"),
|
| 76 |
-
gr.TextInput(name="checkpoint", label="Checkpoint Directory"),
|
| 77 |
-
gr.TextInput(name="output_dir", label="Output Directory")
|
| 78 |
-
]
|
| 79 |
-
output = gr.Interface(
|
| 80 |
-
fn=compute_output,
|
| 81 |
-
inputs=inputs,
|
| 82 |
-
outputs=None,
|
| 83 |
-
title="Audio Processing with Neural Network",
|
| 84 |
-
description="Process audio input using a neural network model.",
|
| 85 |
-
theme="compact"
|
| 86 |
)
|
| 87 |
-
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from inference import InferencePipeline
|
| 3 |
+
|
| 4 |
+
i = InferencePipeline()
|
| 5 |
+
|
| 6 |
+
def gradio_voice_conversion(audio_file_path):
|
| 7 |
+
"""
|
| 8 |
+
Wrapper function to handle Gradio's audio input and pass the file path to the voice conversion function.
|
| 9 |
+
Gradio passes audio data as a tuple: (temp file path, sample rate).
|
| 10 |
+
"""
|
| 11 |
+
# Gradio passes audio as (temp file path, sample rate)
|
| 12 |
+
#audio_file_path = audio_data[0] # Extract the file path
|
| 13 |
+
print(f"Here is the audio_file_path: {audio_file_path}")
|
| 14 |
+
#print(f"Here is the audio_file_path[0]: {audio_file_path[0]}")
|
| 15 |
+
return i.voice_conversion(audio_file_path)
|
| 16 |
+
|
| 17 |
+
# Define your Gradio interface
|
| 18 |
+
demo = gr.Interface(
|
| 19 |
+
fn=gradio_voice_conversion, # Use the wrapper function for voice conversion
|
| 20 |
+
inputs=gr.Audio(label="Record or upload your voice", type="filepath"), # Specify that you want the filepath
|
| 21 |
+
outputs=gr.Audio(label="Converted Voice"),
|
| 22 |
+
title="Voice Conversion Demo",
|
| 23 |
+
description="Voice Conversion: Transform the input voice to a target voice.",
|
| 24 |
+
allow_flagging="never"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
)
|
| 26 |
+
|
| 27 |
+
if __name__ == "__main__":
|
| 28 |
+
demo.launch()
|