Spaces:

teamup-tech
/

audioseal

Sleeping

App Files Files Community

ellagranger commited on 29 days ago

Commit

33158ae

1 Parent(s): e62c14e

Bug fix, segment

Browse files

Files changed (2) hide show

app.py +47 -7
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import uuid
 from pathlib import Path
 from contextlib import contextmanager
 import numpy as np
 import torch
 import gradio as gr
@@ -11,6 +13,7 @@ import librosa
 from pyharp.core import ModelCard, build_endpoint
 from pyharp.media.audio import save_audio
 from audiotools import AudioSignal
 from audioseal import AudioSeal
@@ -83,7 +86,7 @@ def detect(signal: AudioSignal, detector: torch.nn.Module):
     return result[0, 1, :].detach().cpu().numpy()
 def process_fn(inp_audio, option_text):
-    audio_np, sr = torch.from_numpy(load_audio(inp_audio))
     print(f"sr: {sr}, audio shape: {audio_np.shape}")
     if audio_np.ndim == 1:
@@ -93,18 +96,53 @@ def process_fn(inp_audio, option_text):
     print(f"formatted audio: {audio_np.shape}")
-    sig = AudioSignal(torch.from_numpy(audio_np).float(), sample_rate=sr)
-    orig_loud = sig.loudness()
-    sig = sig.to_mono().resample(SAMPLE_RATE).normalize(LOUDNESS_DB).ensure_max_of_audio()
     if option_text == "Generate Watermark":
         with torch.no_grad():
             wm_sig = embed(sig.clone(), generator).normalize(orig_loud).ensure_max_of_audio()
-            return save_audio(wm_sig)
     else:
-        scores = detect(sig, detector)
         N = len(scores)
 with gr.Blocks() as app:
     gr.Markdown("## Meta AudioSeal Watermarking")
@@ -128,6 +166,7 @@ with gr.Blocks() as app:
         type="filepath",
         label="Watermarked Speech"
     )
     _ = build_endpoint(
         model_card=model_card,
@@ -136,7 +175,8 @@ with gr.Blocks() as app:
             option_dropdown
         ],
         output_components=[
-            output_wav
         ],
         process_fn=process_fn
     )

 from pathlib import Path
 from contextlib import contextmanager
+import ruptures as rpt
 import numpy as np
 import torch
 import gradio as gr
 from pyharp.core import ModelCard, build_endpoint
 from pyharp.media.audio import save_audio
+from pyharp import LabelList, AudioLabel, OutputLabel
 from audiotools import AudioSignal
 from audioseal import AudioSeal
     return result[0, 1, :].detach().cpu().numpy()
 def process_fn(inp_audio, option_text):
+    audio_np, sr = load_audio(inp_audio)
     print(f"sr: {sr}, audio shape: {audio_np.shape}")
     if audio_np.ndim == 1:
     print(f"formatted audio: {audio_np.shape}")
+    ori_sig = AudioSignal(torch.from_numpy(audio_np).float(), sample_rate=sr)
+    orig_loud = ori_sig.loudness()
+    sig = ori_sig.to_mono().resample(SAMPLE_RATE).normalize(LOUDNESS_DB).ensure_max_of_audio()
+    output_labels = LabelList()
     if option_text == "Generate Watermark":
         with torch.no_grad():
             wm_sig = embed(sig.clone(), generator).normalize(orig_loud).ensure_max_of_audio()
+            return save_audio(wm_sig), output_labels
     else:
+        with torch.no_grad():
+            scores = detect(sig, detector) # AUDIOSEAL_SAMPLE_RATE
         N = len(scores)
+        hop = int(0.01 * AUDIOSEAL_SAMPLE_RATE)
+        avg_curve = []
+        for i in range(0, N, hop):
+            seg = scores[i:i+hop]
+            value = np.mean(seg)
+            avg_curve.append(value)
+        avg_curve = np.array(avg_curve)
+        print(avg_curve.shape)
+        min_size = max(2, int(0.25 * AUDIOSEAL_SAMPLE_RATE))
+        bkps = rpt.Pelt(model="l2", min_size=1).fit_predict(avg_curve, 1.0)
+        t0 = 0
+        for t1 in bkps:
+            print(t0, t1)
+            seg = avg_curve[t0:t1]
+            value = seg.mean()
+            output_labels.labels.append(
+                AudioLabel(
+                    t = (t0 / 100),
+                    label = f"watermark: {value:.2f}",
+                    duration = (t1 - t0) / 100,
+                    description = f"watermark confidence: {value:.2f}, start: {(t0 / 100):.2f}s, end: {(t1 / 100):.2f}s",
+                    color = OutputLabel.rgb_color_to_int(int(value * 255), int((1 - value) * 255), 0),
+                    amplitude = value * 2 - 1
+                )
+            )
+            t0 = t1
+        return save_audio(ori_sig), output_labels
 with gr.Blocks() as app:
     gr.Markdown("## Meta AudioSeal Watermarking")
         type="filepath",
         label="Watermarked Speech"
     )
+    output_label = gr.JSON(label="Watermark Confidence")
     _ = build_endpoint(
         model_card=model_card,
             option_dropdown
         ],
         output_components=[
+            output_wav,
+            output_label
         ],
         process_fn=process_fn
     )

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 git+https://github.com/TEAMuP-dev/pyharp.git@v0.3.0
 audioseal
 librosa
-pydantic==2.10.6

 git+https://github.com/TEAMuP-dev/pyharp.git@v0.3.0
 audioseal
 librosa
+pydantic==2.10.6
+ruptures