ellagranger commited on
Commit
33158ae
·
1 Parent(s): e62c14e

Bug fix, segment

Browse files
Files changed (2) hide show
  1. app.py +47 -7
  2. requirements.txt +2 -1
app.py CHANGED
@@ -4,6 +4,8 @@ import uuid
4
  from pathlib import Path
5
  from contextlib import contextmanager
6
 
 
 
7
  import numpy as np
8
  import torch
9
  import gradio as gr
@@ -11,6 +13,7 @@ import librosa
11
 
12
  from pyharp.core import ModelCard, build_endpoint
13
  from pyharp.media.audio import save_audio
 
14
  from audiotools import AudioSignal
15
  from audioseal import AudioSeal
16
 
@@ -83,7 +86,7 @@ def detect(signal: AudioSignal, detector: torch.nn.Module):
83
  return result[0, 1, :].detach().cpu().numpy()
84
 
85
  def process_fn(inp_audio, option_text):
86
- audio_np, sr = torch.from_numpy(load_audio(inp_audio))
87
 
88
  print(f"sr: {sr}, audio shape: {audio_np.shape}")
89
  if audio_np.ndim == 1:
@@ -93,18 +96,53 @@ def process_fn(inp_audio, option_text):
93
 
94
  print(f"formatted audio: {audio_np.shape}")
95
 
96
- sig = AudioSignal(torch.from_numpy(audio_np).float(), sample_rate=sr)
97
- orig_loud = sig.loudness()
98
- sig = sig.to_mono().resample(SAMPLE_RATE).normalize(LOUDNESS_DB).ensure_max_of_audio()
 
 
99
 
100
  if option_text == "Generate Watermark":
101
  with torch.no_grad():
102
  wm_sig = embed(sig.clone(), generator).normalize(orig_loud).ensure_max_of_audio()
103
- return save_audio(wm_sig)
104
  else:
105
- scores = detect(sig, detector)
 
106
  N = len(scores)
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  with gr.Blocks() as app:
110
  gr.Markdown("## Meta AudioSeal Watermarking")
@@ -128,6 +166,7 @@ with gr.Blocks() as app:
128
  type="filepath",
129
  label="Watermarked Speech"
130
  )
 
131
 
132
  _ = build_endpoint(
133
  model_card=model_card,
@@ -136,7 +175,8 @@ with gr.Blocks() as app:
136
  option_dropdown
137
  ],
138
  output_components=[
139
- output_wav
 
140
  ],
141
  process_fn=process_fn
142
  )
 
4
  from pathlib import Path
5
  from contextlib import contextmanager
6
 
7
+ import ruptures as rpt
8
+
9
  import numpy as np
10
  import torch
11
  import gradio as gr
 
13
 
14
  from pyharp.core import ModelCard, build_endpoint
15
  from pyharp.media.audio import save_audio
16
+ from pyharp import LabelList, AudioLabel, OutputLabel
17
  from audiotools import AudioSignal
18
  from audioseal import AudioSeal
19
 
 
86
  return result[0, 1, :].detach().cpu().numpy()
87
 
88
  def process_fn(inp_audio, option_text):
89
+ audio_np, sr = load_audio(inp_audio)
90
 
91
  print(f"sr: {sr}, audio shape: {audio_np.shape}")
92
  if audio_np.ndim == 1:
 
96
 
97
  print(f"formatted audio: {audio_np.shape}")
98
 
99
+ ori_sig = AudioSignal(torch.from_numpy(audio_np).float(), sample_rate=sr)
100
+ orig_loud = ori_sig.loudness()
101
+ sig = ori_sig.to_mono().resample(SAMPLE_RATE).normalize(LOUDNESS_DB).ensure_max_of_audio()
102
+
103
+ output_labels = LabelList()
104
 
105
  if option_text == "Generate Watermark":
106
  with torch.no_grad():
107
  wm_sig = embed(sig.clone(), generator).normalize(orig_loud).ensure_max_of_audio()
108
+ return save_audio(wm_sig), output_labels
109
  else:
110
+ with torch.no_grad():
111
+ scores = detect(sig, detector) # AUDIOSEAL_SAMPLE_RATE
112
  N = len(scores)
113
 
114
+ hop = int(0.01 * AUDIOSEAL_SAMPLE_RATE)
115
+ avg_curve = []
116
+ for i in range(0, N, hop):
117
+ seg = scores[i:i+hop]
118
+ value = np.mean(seg)
119
+ avg_curve.append(value)
120
+
121
+ avg_curve = np.array(avg_curve)
122
+ print(avg_curve.shape)
123
+
124
+ min_size = max(2, int(0.25 * AUDIOSEAL_SAMPLE_RATE))
125
+ bkps = rpt.Pelt(model="l2", min_size=1).fit_predict(avg_curve, 1.0)
126
+
127
+ t0 = 0
128
+ for t1 in bkps:
129
+ print(t0, t1)
130
+ seg = avg_curve[t0:t1]
131
+ value = seg.mean()
132
+ output_labels.labels.append(
133
+ AudioLabel(
134
+ t = (t0 / 100),
135
+ label = f"watermark: {value:.2f}",
136
+ duration = (t1 - t0) / 100,
137
+ description = f"watermark confidence: {value:.2f}, start: {(t0 / 100):.2f}s, end: {(t1 / 100):.2f}s",
138
+ color = OutputLabel.rgb_color_to_int(int(value * 255), int((1 - value) * 255), 0),
139
+ amplitude = value * 2 - 1
140
+ )
141
+ )
142
+ t0 = t1
143
+
144
+ return save_audio(ori_sig), output_labels
145
+
146
 
147
  with gr.Blocks() as app:
148
  gr.Markdown("## Meta AudioSeal Watermarking")
 
166
  type="filepath",
167
  label="Watermarked Speech"
168
  )
169
+ output_label = gr.JSON(label="Watermark Confidence")
170
 
171
  _ = build_endpoint(
172
  model_card=model_card,
 
175
  option_dropdown
176
  ],
177
  output_components=[
178
+ output_wav,
179
+ output_label
180
  ],
181
  process_fn=process_fn
182
  )
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  git+https://github.com/TEAMuP-dev/pyharp.git@v0.3.0
2
  audioseal
3
  librosa
4
- pydantic==2.10.6
 
 
1
  git+https://github.com/TEAMuP-dev/pyharp.git@v0.3.0
2
  audioseal
3
  librosa
4
+ pydantic==2.10.6
5
+ ruptures