Chuatury commited on
Commit
e26ae6d
·
unverified ·
1 Parent(s): 15cc387

remove watermark

Browse files
Files changed (3) hide show
  1. OpenVoice/api.py +1 -51
  2. app_locally.py +0 -2
  3. requirements.txt +0 -1
OpenVoice/api.py CHANGED
@@ -103,14 +103,6 @@ class ToneColorConverter(OpenVoiceBaseClass):
103
  def __init__(self, *args, **kwargs):
104
  super().__init__(*args, **kwargs)
105
 
106
- if kwargs.get('enable_watermark', True):
107
- import wavmark
108
- self.watermark_model = wavmark.load_model().to(self.device)
109
- else:
110
- self.watermark_model = None
111
-
112
-
113
-
114
  def extract_se(self, ref_wav_list, se_save_path=None):
115
  if isinstance(ref_wav_list, str):
116
  ref_wav_list = [ref_wav_list]
@@ -138,7 +130,7 @@ class ToneColorConverter(OpenVoiceBaseClass):
138
 
139
  return gs
140
 
141
- def convert(self, audio_src_path, src_se, tgt_se, output_path=None, tau=0.3, message="default"):
142
  hps = self.hps
143
  # load audio
144
  audio, sample_rate = librosa.load(audio_src_path, sr=hps.data.sampling_rate)
@@ -153,50 +145,8 @@ class ToneColorConverter(OpenVoiceBaseClass):
153
  spec_lengths = torch.LongTensor([spec.size(-1)]).to(self.device)
154
  audio = self.model.voice_conversion(spec, spec_lengths, sid_src=src_se, sid_tgt=tgt_se, tau=tau)[0][
155
  0, 0].data.cpu().float().numpy()
156
- audio = self.add_watermark(audio, message)
157
  if output_path is None:
158
  return audio
159
  else:
160
  soundfile.write(output_path, audio, hps.data.sampling_rate)
161
 
162
- def add_watermark(self, audio, message):
163
- if self.watermark_model is None:
164
- return audio
165
- device = self.device
166
- bits = utils.string_to_bits(message).reshape(-1)
167
- n_repeat = len(bits) // 32
168
-
169
- K = 16000
170
- coeff = 2
171
- for n in range(n_repeat):
172
- trunck = audio[(coeff * n) * K: (coeff * n + 1) * K]
173
- if len(trunck) != K:
174
- print('Audio too short, fail to add watermark')
175
- break
176
- message_npy = bits[n * 32: (n + 1) * 32]
177
-
178
- with torch.no_grad():
179
- signal = torch.FloatTensor(trunck).to(device)[None]
180
- message_tensor = torch.FloatTensor(message_npy).to(device)[None]
181
- signal_wmd_tensor = self.watermark_model.encode(signal, message_tensor)
182
- signal_wmd_npy = signal_wmd_tensor.detach().cpu().squeeze()
183
- audio[(coeff * n) * K: (coeff * n + 1) * K] = signal_wmd_npy
184
- return audio
185
-
186
- def detect_watermark(self, audio, n_repeat):
187
- bits = []
188
- K = 16000
189
- coeff = 2
190
- for n in range(n_repeat):
191
- trunck = audio[(coeff * n) * K: (coeff * n + 1) * K]
192
- if len(trunck) != K:
193
- print('Audio too short, fail to detect watermark')
194
- return 'Fail'
195
- with torch.no_grad():
196
- signal = torch.FloatTensor(trunck).to(self.device).unsqueeze(0)
197
- message_decoded_npy = (self.watermark_model.decode(signal) >= 0.5).int().detach().cpu().numpy().squeeze()
198
- bits.append(message_decoded_npy)
199
- bits = np.stack(bits).reshape(-1, 8)
200
- message = utils.bits_to_string(bits)
201
- return message
202
-
 
103
  def __init__(self, *args, **kwargs):
104
  super().__init__(*args, **kwargs)
105
 
 
 
 
 
 
 
 
 
106
  def extract_se(self, ref_wav_list, se_save_path=None):
107
  if isinstance(ref_wav_list, str):
108
  ref_wav_list = [ref_wav_list]
 
130
 
131
  return gs
132
 
133
+ def convert(self, audio_src_path, src_se, tgt_se, output_path=None, tau=0.3):
134
  hps = self.hps
135
  # load audio
136
  audio, sample_rate = librosa.load(audio_src_path, sr=hps.data.sampling_rate)
 
145
  spec_lengths = torch.LongTensor([spec.size(-1)]).to(self.device)
146
  audio = self.model.voice_conversion(spec, spec_lengths, sid_src=src_se, sid_tgt=tgt_se, tau=tau)[0][
147
  0, 0].data.cpu().float().numpy()
 
148
  if output_path is None:
149
  return audio
150
  else:
151
  soundfile.write(output_path, audio, hps.data.sampling_rate)
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_locally.py CHANGED
@@ -118,13 +118,11 @@ def predict(prompt, speaker_wav, transform_wav):
118
 
119
  save_path = f"{output_dir}/output.wav"
120
  # Run the tone color converter
121
- encode_message = "@MyShell"
122
  tone_color_converter.convert(
123
  audio_src_path=src_path,
124
  src_se=source_se,
125
  tgt_se=target_se,
126
  output_path=save_path,
127
- message=encode_message,
128
  )
129
 
130
  text_hint += f"""Get response successfully \n"""
 
118
 
119
  save_path = f"{output_dir}/output.wav"
120
  # Run the tone color converter
 
121
  tone_color_converter.convert(
122
  audio_src_path=src_path,
123
  src_se=source_se,
124
  tgt_se=target_se,
125
  output_path=save_path,
 
126
  )
127
 
128
  text_hint += f"""Get response successfully \n"""
requirements.txt CHANGED
@@ -2,7 +2,6 @@ langid
2
  librosa==0.9.1
3
  faster-whisper==0.9.0
4
  pydub==0.25.1
5
- wavmark==0.0.2
6
  numpy==1.22.0
7
  eng_to_ipa==0.0.2
8
  inflect==7.0.0
 
2
  librosa==0.9.1
3
  faster-whisper==0.9.0
4
  pydub==0.25.1
 
5
  numpy==1.22.0
6
  eng_to_ipa==0.0.2
7
  inflect==7.0.0