Spaces:

swc2
/

Target-speaker-extraction

Running

swc2 commited on Apr 3, 2025

Commit

956c248

1 Parent(s): eada018

change to v3.0

Files changed (2) hide show

datahandler.py CHANGED Viewed

@@ -18,8 +18,8 @@ class AudioMixer(object):
     def __init__(
         self,
         sample_rate=16000,
-        mean_snr=-4,
-        var_snr=10,
         mean_loudness=-24,
         var_loudness=10
     ):

     def __init__(
         self,
         sample_rate=16000,
+        mean_snr=-3,
+        var_snr=8,
         mean_loudness=-24,
         var_loudness=10
     ):

decode.py CHANGED Viewed

@@ -43,19 +43,17 @@ class InferencePipeline:
         self.computer_ = NnetComputer(config.test.checkpoint,config.test.gpu, model_inst)
-    def run_inference(self, input_audio_path: str, enroll_audio_path: str) -> str:
         mix_samps, sr = sf.read(input_audio_path)
         aux_samps, sr2 = sf.read(enroll_audio_path)
-        aux_samps[10:]
         samps = self.computer_.compute(mix_samps, aux_samps, len(aux_samps))
         norm = np.linalg.norm(mix_samps, np.inf)
         samps = samps[:mix_samps.size]
         samps = samps * norm / np.max(np.abs(samps))
-        out_wav = "temp_extracted.wav"
         sf.write(out_wav, samps, sr)
         return out_wav
@@ -65,7 +63,8 @@ if __name__ == "__main__":
     mix_path = "test_output_mixture.wav"
     enroll_path = "test_mix.wav"
-    out_wav = pipeline.run_inference(mix_path, enroll_path)
     print("Done:", out_wav)

         self.computer_ = NnetComputer(config.test.checkpoint,config.test.gpu, model_inst)
+    def run_inference(self, input_audio_path: str, enroll_audio_path: str, out_path: str) -> str:
         mix_samps, sr = sf.read(input_audio_path)
         aux_samps, sr2 = sf.read(enroll_audio_path)
         samps = self.computer_.compute(mix_samps, aux_samps, len(aux_samps))
         norm = np.linalg.norm(mix_samps, np.inf)
         samps = samps[:mix_samps.size]
         samps = samps * norm / np.max(np.abs(samps))
+        out_wav = out_path
         sf.write(out_wav, samps, sr)
         return out_wav
     mix_path = "test_output_mixture.wav"
     enroll_path = "test_mix.wav"
+    out_path = "temp_output.wav"
+    out_wav = pipeline.run_inference(mix_path, enroll_path, out_path)
     print("Done:", out_wav)