OpenVoice

Sleeping

App Files Files Community

Chuatury commited on Jun 5, 2025

Commit

e26ae6d

unverified ·

1 Parent(s): 15cc387

remove watermark

Browse files

Files changed (3) hide show

OpenVoice/api.py +1 -51
app_locally.py +0 -2
requirements.txt +0 -1

OpenVoice/api.py CHANGED Viewed

@@ -103,14 +103,6 @@ class ToneColorConverter(OpenVoiceBaseClass):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        if kwargs.get('enable_watermark', True):
-            import wavmark
-            self.watermark_model = wavmark.load_model().to(self.device)
-        else:
-            self.watermark_model = None
     def extract_se(self, ref_wav_list, se_save_path=None):
         if isinstance(ref_wav_list, str):
             ref_wav_list = [ref_wav_list]
@@ -138,7 +130,7 @@ class ToneColorConverter(OpenVoiceBaseClass):
         return gs
-    def convert(self, audio_src_path, src_se, tgt_se, output_path=None, tau=0.3, message="default"):
         hps = self.hps
         # load audio
         audio, sample_rate = librosa.load(audio_src_path, sr=hps.data.sampling_rate)
@@ -153,50 +145,8 @@ class ToneColorConverter(OpenVoiceBaseClass):
             spec_lengths = torch.LongTensor([spec.size(-1)]).to(self.device)
             audio = self.model.voice_conversion(spec, spec_lengths, sid_src=src_se, sid_tgt=tgt_se, tau=tau)[0][
                         0, 0].data.cpu().float().numpy()
-            audio = self.add_watermark(audio, message)
             if output_path is None:
                 return audio
             else:
                 soundfile.write(output_path, audio, hps.data.sampling_rate)
-    def add_watermark(self, audio, message):
-        if self.watermark_model is None:
-            return audio
-        device = self.device
-        bits = utils.string_to_bits(message).reshape(-1)
-        n_repeat = len(bits) // 32
-        K = 16000
-        coeff = 2
-        for n in range(n_repeat):
-            trunck = audio[(coeff * n) * K: (coeff * n + 1) * K]
-            if len(trunck) != K:
-                print('Audio too short, fail to add watermark')
-                break
-            message_npy = bits[n * 32: (n + 1) * 32]
-            with torch.no_grad():
-                signal = torch.FloatTensor(trunck).to(device)[None]
-                message_tensor = torch.FloatTensor(message_npy).to(device)[None]
-                signal_wmd_tensor = self.watermark_model.encode(signal, message_tensor)
-                signal_wmd_npy = signal_wmd_tensor.detach().cpu().squeeze()
-            audio[(coeff * n) * K: (coeff * n + 1) * K] = signal_wmd_npy
-        return audio
-    def detect_watermark(self, audio, n_repeat):
-        bits = []
-        K = 16000
-        coeff = 2
-        for n in range(n_repeat):
-            trunck = audio[(coeff * n) * K: (coeff * n + 1) * K]
-            if len(trunck) != K:
-                print('Audio too short, fail to detect watermark')
-                return 'Fail'
-            with torch.no_grad():
-                signal = torch.FloatTensor(trunck).to(self.device).unsqueeze(0)
-                message_decoded_npy = (self.watermark_model.decode(signal) >= 0.5).int().detach().cpu().numpy().squeeze()
-            bits.append(message_decoded_npy)
-        bits = np.stack(bits).reshape(-1, 8)
-        message = utils.bits_to_string(bits)
-        return message

     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
     def extract_se(self, ref_wav_list, se_save_path=None):
         if isinstance(ref_wav_list, str):
             ref_wav_list = [ref_wav_list]
         return gs
+    def convert(self, audio_src_path, src_se, tgt_se, output_path=None, tau=0.3):
         hps = self.hps
         # load audio
         audio, sample_rate = librosa.load(audio_src_path, sr=hps.data.sampling_rate)
             spec_lengths = torch.LongTensor([spec.size(-1)]).to(self.device)
             audio = self.model.voice_conversion(spec, spec_lengths, sid_src=src_se, sid_tgt=tgt_se, tau=tau)[0][
                         0, 0].data.cpu().float().numpy()
             if output_path is None:
                 return audio
             else:
                 soundfile.write(output_path, audio, hps.data.sampling_rate)

app_locally.py CHANGED Viewed

@@ -118,13 +118,11 @@ def predict(prompt, speaker_wav, transform_wav):
     save_path = f"{output_dir}/output.wav"
     # Run the tone color converter
-    encode_message = "@MyShell"
     tone_color_converter.convert(
         audio_src_path=src_path,
         src_se=source_se,
         tgt_se=target_se,
         output_path=save_path,
-        message=encode_message,
     )
     text_hint += f"""Get response successfully \n"""

     save_path = f"{output_dir}/output.wav"
     # Run the tone color converter
     tone_color_converter.convert(
         audio_src_path=src_path,
         src_se=source_se,
         tgt_se=target_se,
         output_path=save_path,
     )
     text_hint += f"""Get response successfully \n"""

requirements.txt CHANGED Viewed

@@ -2,7 +2,6 @@ langid
 librosa==0.9.1
 faster-whisper==0.9.0
 pydub==0.25.1
-wavmark==0.0.2
 numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect==7.0.0

 librosa==0.9.1
 faster-whisper==0.9.0
 pydub==0.25.1
 numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect==7.0.0