Spaces:
Running
Running
Commit
·
33158ae
1
Parent(s):
e62c14e
Bug fix, segment
Browse files- app.py +47 -7
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -4,6 +4,8 @@ import uuid
|
|
| 4 |
from pathlib import Path
|
| 5 |
from contextlib import contextmanager
|
| 6 |
|
|
|
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
import torch
|
| 9 |
import gradio as gr
|
|
@@ -11,6 +13,7 @@ import librosa
|
|
| 11 |
|
| 12 |
from pyharp.core import ModelCard, build_endpoint
|
| 13 |
from pyharp.media.audio import save_audio
|
|
|
|
| 14 |
from audiotools import AudioSignal
|
| 15 |
from audioseal import AudioSeal
|
| 16 |
|
|
@@ -83,7 +86,7 @@ def detect(signal: AudioSignal, detector: torch.nn.Module):
|
|
| 83 |
return result[0, 1, :].detach().cpu().numpy()
|
| 84 |
|
| 85 |
def process_fn(inp_audio, option_text):
|
| 86 |
-
audio_np, sr =
|
| 87 |
|
| 88 |
print(f"sr: {sr}, audio shape: {audio_np.shape}")
|
| 89 |
if audio_np.ndim == 1:
|
|
@@ -93,18 +96,53 @@ def process_fn(inp_audio, option_text):
|
|
| 93 |
|
| 94 |
print(f"formatted audio: {audio_np.shape}")
|
| 95 |
|
| 96 |
-
|
| 97 |
-
orig_loud =
|
| 98 |
-
sig =
|
|
|
|
|
|
|
| 99 |
|
| 100 |
if option_text == "Generate Watermark":
|
| 101 |
with torch.no_grad():
|
| 102 |
wm_sig = embed(sig.clone(), generator).normalize(orig_loud).ensure_max_of_audio()
|
| 103 |
-
return save_audio(wm_sig)
|
| 104 |
else:
|
| 105 |
-
|
|
|
|
| 106 |
N = len(scores)
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
with gr.Blocks() as app:
|
| 110 |
gr.Markdown("## Meta AudioSeal Watermarking")
|
|
@@ -128,6 +166,7 @@ with gr.Blocks() as app:
|
|
| 128 |
type="filepath",
|
| 129 |
label="Watermarked Speech"
|
| 130 |
)
|
|
|
|
| 131 |
|
| 132 |
_ = build_endpoint(
|
| 133 |
model_card=model_card,
|
|
@@ -136,7 +175,8 @@ with gr.Blocks() as app:
|
|
| 136 |
option_dropdown
|
| 137 |
],
|
| 138 |
output_components=[
|
| 139 |
-
output_wav
|
|
|
|
| 140 |
],
|
| 141 |
process_fn=process_fn
|
| 142 |
)
|
|
|
|
| 4 |
from pathlib import Path
|
| 5 |
from contextlib import contextmanager
|
| 6 |
|
| 7 |
+
import ruptures as rpt
|
| 8 |
+
|
| 9 |
import numpy as np
|
| 10 |
import torch
|
| 11 |
import gradio as gr
|
|
|
|
| 13 |
|
| 14 |
from pyharp.core import ModelCard, build_endpoint
|
| 15 |
from pyharp.media.audio import save_audio
|
| 16 |
+
from pyharp import LabelList, AudioLabel, OutputLabel
|
| 17 |
from audiotools import AudioSignal
|
| 18 |
from audioseal import AudioSeal
|
| 19 |
|
|
|
|
| 86 |
return result[0, 1, :].detach().cpu().numpy()
|
| 87 |
|
| 88 |
def process_fn(inp_audio, option_text):
|
| 89 |
+
audio_np, sr = load_audio(inp_audio)
|
| 90 |
|
| 91 |
print(f"sr: {sr}, audio shape: {audio_np.shape}")
|
| 92 |
if audio_np.ndim == 1:
|
|
|
|
| 96 |
|
| 97 |
print(f"formatted audio: {audio_np.shape}")
|
| 98 |
|
| 99 |
+
ori_sig = AudioSignal(torch.from_numpy(audio_np).float(), sample_rate=sr)
|
| 100 |
+
orig_loud = ori_sig.loudness()
|
| 101 |
+
sig = ori_sig.to_mono().resample(SAMPLE_RATE).normalize(LOUDNESS_DB).ensure_max_of_audio()
|
| 102 |
+
|
| 103 |
+
output_labels = LabelList()
|
| 104 |
|
| 105 |
if option_text == "Generate Watermark":
|
| 106 |
with torch.no_grad():
|
| 107 |
wm_sig = embed(sig.clone(), generator).normalize(orig_loud).ensure_max_of_audio()
|
| 108 |
+
return save_audio(wm_sig), output_labels
|
| 109 |
else:
|
| 110 |
+
with torch.no_grad():
|
| 111 |
+
scores = detect(sig, detector) # AUDIOSEAL_SAMPLE_RATE
|
| 112 |
N = len(scores)
|
| 113 |
|
| 114 |
+
hop = int(0.01 * AUDIOSEAL_SAMPLE_RATE)
|
| 115 |
+
avg_curve = []
|
| 116 |
+
for i in range(0, N, hop):
|
| 117 |
+
seg = scores[i:i+hop]
|
| 118 |
+
value = np.mean(seg)
|
| 119 |
+
avg_curve.append(value)
|
| 120 |
+
|
| 121 |
+
avg_curve = np.array(avg_curve)
|
| 122 |
+
print(avg_curve.shape)
|
| 123 |
+
|
| 124 |
+
min_size = max(2, int(0.25 * AUDIOSEAL_SAMPLE_RATE))
|
| 125 |
+
bkps = rpt.Pelt(model="l2", min_size=1).fit_predict(avg_curve, 1.0)
|
| 126 |
+
|
| 127 |
+
t0 = 0
|
| 128 |
+
for t1 in bkps:
|
| 129 |
+
print(t0, t1)
|
| 130 |
+
seg = avg_curve[t0:t1]
|
| 131 |
+
value = seg.mean()
|
| 132 |
+
output_labels.labels.append(
|
| 133 |
+
AudioLabel(
|
| 134 |
+
t = (t0 / 100),
|
| 135 |
+
label = f"watermark: {value:.2f}",
|
| 136 |
+
duration = (t1 - t0) / 100,
|
| 137 |
+
description = f"watermark confidence: {value:.2f}, start: {(t0 / 100):.2f}s, end: {(t1 / 100):.2f}s",
|
| 138 |
+
color = OutputLabel.rgb_color_to_int(int(value * 255), int((1 - value) * 255), 0),
|
| 139 |
+
amplitude = value * 2 - 1
|
| 140 |
+
)
|
| 141 |
+
)
|
| 142 |
+
t0 = t1
|
| 143 |
+
|
| 144 |
+
return save_audio(ori_sig), output_labels
|
| 145 |
+
|
| 146 |
|
| 147 |
with gr.Blocks() as app:
|
| 148 |
gr.Markdown("## Meta AudioSeal Watermarking")
|
|
|
|
| 166 |
type="filepath",
|
| 167 |
label="Watermarked Speech"
|
| 168 |
)
|
| 169 |
+
output_label = gr.JSON(label="Watermark Confidence")
|
| 170 |
|
| 171 |
_ = build_endpoint(
|
| 172 |
model_card=model_card,
|
|
|
|
| 175 |
option_dropdown
|
| 176 |
],
|
| 177 |
output_components=[
|
| 178 |
+
output_wav,
|
| 179 |
+
output_label
|
| 180 |
],
|
| 181 |
process_fn=process_fn
|
| 182 |
)
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
git+https://github.com/TEAMuP-dev/pyharp.git@v0.3.0
|
| 2 |
audioseal
|
| 3 |
librosa
|
| 4 |
-
pydantic==2.10.6
|
|
|
|
|
|
| 1 |
git+https://github.com/TEAMuP-dev/pyharp.git@v0.3.0
|
| 2 |
audioseal
|
| 3 |
librosa
|
| 4 |
+
pydantic==2.10.6
|
| 5 |
+
ruptures
|