Spaces:

LocalAI-io
/

LocalVQE-demo

Running

App Files Files Community

richiejp commited on 23 days ago

Commit

eee8304

verified ·

1 Parent(s): b180d02

Initial upload: LocalVQE demo Space

Browse files

Files changed (12) hide show

.gitattributes +10 -0
app.py +40 -18
examples/dt_mic.wav +3 -0
examples/dt_ref.wav +3 -0
examples/fe_st2_mic.wav +3 -0
examples/fe_st2_ref.wav +3 -0
examples/fe_st_mic.wav +3 -0
examples/fe_st_ref.wav +3 -0
examples/ne_st_clean_mic.wav +3 -0
examples/ne_st_clean_ref.wav +3 -0
examples/ne_st_noisy_mic.wav +3 -0
examples/ne_st_noisy_ref.wav +3 -0

.gitattributes CHANGED Viewed

@@ -43,3 +43,13 @@ examples/fe_st2_mic.flac filter=lfs diff=lfs merge=lfs -text
 examples/fe_st2_ref.flac filter=lfs diff=lfs merge=lfs -text
 examples/ne_st_clean_mic.flac filter=lfs diff=lfs merge=lfs -text
 examples/ne_st_clean_ref.flac filter=lfs diff=lfs merge=lfs -text

 examples/fe_st2_ref.flac filter=lfs diff=lfs merge=lfs -text
 examples/ne_st_clean_mic.flac filter=lfs diff=lfs merge=lfs -text
 examples/ne_st_clean_ref.flac filter=lfs diff=lfs merge=lfs -text
+examples/dt_mic.wav filter=lfs diff=lfs merge=lfs -text
+examples/dt_ref.wav filter=lfs diff=lfs merge=lfs -text
+examples/fe_st2_mic.wav filter=lfs diff=lfs merge=lfs -text
+examples/fe_st2_ref.wav filter=lfs diff=lfs merge=lfs -text
+examples/fe_st_mic.wav filter=lfs diff=lfs merge=lfs -text
+examples/fe_st_ref.wav filter=lfs diff=lfs merge=lfs -text
+examples/ne_st_clean_mic.wav filter=lfs diff=lfs merge=lfs -text
+examples/ne_st_clean_ref.wav filter=lfs diff=lfs merge=lfs -text
+examples/ne_st_noisy_mic.wav filter=lfs diff=lfs merge=lfs -text
+examples/ne_st_noisy_ref.wav filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ huggingface.co/LocalAI-io/LocalVQE. Set the env var
 `LOCALVQE_LOCAL_CKPT=/path/to/checkpoint.pt` to load a local file
 instead — useful for auditioning new training runs.
 """
 import os
 from pathlib import Path
@@ -22,14 +23,25 @@ CKPT_FILE = "localvqe-v1-1.3M.pt"
 EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
-def _build_model() -> LocalVQE:
     local_override = os.environ.get("LOCALVQE_LOCAL_CKPT")
     if local_override:
         ckpt_path = local_override
         print(f"Loading local checkpoint: {ckpt_path}")
     else:
         from huggingface_hub import hf_hub_download
         ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=CKPT_FILE)
     cfg = Config()
     peek = torch.load(ckpt_path, map_location="cpu", weights_only=False)
     apply_ckpt_model_config(peek, cfg)
@@ -43,11 +55,12 @@ def _build_model() -> LocalVQE:
     model.align.fold_temperature()
     model.eval()
     n_params = sum(p.numel() for p in model.parameters())
-    print(f"LocalVQE loaded: {n_params:,} params from {ckpt_path}")
-    return model
-MODEL = _build_model()
 def _load_mono_16k(path: str) -> np.ndarray:
@@ -93,24 +106,24 @@ def enhance(mic_path: str, ref_path: str) -> tuple[int, np.ndarray]:
 EXAMPLES = [
     [
-        str(EXAMPLES_DIR / "ne_st_noisy_mic.flac"),
-        str(EXAMPLES_DIR / "ne_st_noisy_ref.flac"),
     ],
     [
-        str(EXAMPLES_DIR / "ne_st_clean_mic.flac"),
-        str(EXAMPLES_DIR / "ne_st_clean_ref.flac"),
     ],
     [
-        str(EXAMPLES_DIR / "fe_st_mic.flac"),
-        str(EXAMPLES_DIR / "fe_st_ref.flac"),
     ],
     [
-        str(EXAMPLES_DIR / "fe_st2_mic.flac"),
-        str(EXAMPLES_DIR / "fe_st2_ref.flac"),
     ],
     [
-        str(EXAMPLES_DIR / "dt_mic.flac"),
-        str(EXAMPLES_DIR / "dt_ref.flac"),
     ],
 ]
@@ -130,8 +143,10 @@ Provide two inputs:
 Try the bundled examples first — they cover heavy and light
 near-end noise (NE-ST mixed with DNS5 background at 5 dB and 20 dB
-SNR), two far-end single-talk clips, and a double-talk clip from
-the ICASSP 2022 AEC Challenge blind set.
 Weights: [LocalAI-io/LocalVQE](https://huggingface.co/LocalAI-io/LocalVQE) ·
 Code: [github.com/richiejp/LocalVQE](https://github.com/richiejp/LocalVQE)
@@ -152,12 +167,19 @@ with gr.Blocks(title="LocalVQE Demo") as demo:
         label=(
             "Examples — top to bottom: near-end + heavy noise (5 dB SNR, "
             "pure NS), near-end + light noise (20 dB SNR, NS preserving "
-            "clean speech), two far-end single-talk clips (pure AEC), "
-            "double-talk (AEC while near-end is also talking)."
         ),
     )
     btn.click(enhance, inputs=[mic_in, ref_in], outputs=out)
 if __name__ == "__main__":
     demo.launch()

 `LOCALVQE_LOCAL_CKPT=/path/to/checkpoint.pt` to load a local file
 instead — useful for auditioning new training runs.
 """
+import hashlib
 import os
 from pathlib import Path
 EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
+def _sha256(path: str) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(1 << 20), b""):
+            h.update(chunk)
+    return h.hexdigest()
+def _build_model() -> tuple[LocalVQE, dict]:
     local_override = os.environ.get("LOCALVQE_LOCAL_CKPT")
     if local_override:
         ckpt_path = local_override
+        source = f"local:{ckpt_path}"
         print(f"Loading local checkpoint: {ckpt_path}")
     else:
         from huggingface_hub import hf_hub_download
         ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=CKPT_FILE)
+        source = f"hf:{REPO_ID}/{CKPT_FILE}"
+    sha = _sha256(ckpt_path)
     cfg = Config()
     peek = torch.load(ckpt_path, map_location="cpu", weights_only=False)
     apply_ckpt_model_config(peek, cfg)
     model.align.fold_temperature()
     model.eval()
     n_params = sum(p.numel() for p in model.parameters())
+    info = {"source": source, "sha256": sha, "n_params": n_params}
+    print(f"LocalVQE loaded: {n_params:,} params  sha256={sha}  from {source}")
+    return model, info
+MODEL, MODEL_INFO = _build_model()
 def _load_mono_16k(path: str) -> np.ndarray:
 EXAMPLES = [
     [
+        str(EXAMPLES_DIR / "ne_st_noisy_mic.wav"),
+        str(EXAMPLES_DIR / "ne_st_noisy_ref.wav"),
     ],
     [
+        str(EXAMPLES_DIR / "ne_st_clean_mic.wav"),
+        str(EXAMPLES_DIR / "ne_st_clean_ref.wav"),
     ],
     [
+        str(EXAMPLES_DIR / "fe_st_mic.wav"),
+        str(EXAMPLES_DIR / "fe_st_ref.wav"),
     ],
     [
+        str(EXAMPLES_DIR / "fe_st2_mic.wav"),
+        str(EXAMPLES_DIR / "fe_st2_ref.wav"),
     ],
     [
+        str(EXAMPLES_DIR / "dt_mic.wav"),
+        str(EXAMPLES_DIR / "dt_ref.wav"),
     ],
 ]
 Try the bundled examples first — they cover heavy and light
 near-end noise (NE-ST mixed with DNS5 background at 5 dB and 20 dB
+SNR), a clean far-end single-talk clip, a far-end clip with some
+near-end overlap (mislabelled in the source corpus, but a useful
+test of AEC + near-end preservation together), and a double-talk
+clip — all from the ICASSP 2022 AEC Challenge blind set.
 Weights: [LocalAI-io/LocalVQE](https://huggingface.co/LocalAI-io/LocalVQE) ·
 Code: [github.com/richiejp/LocalVQE](https://github.com/richiejp/LocalVQE)
         label=(
             "Examples — top to bottom: near-end + heavy noise (5 dB SNR, "
             "pure NS), near-end + light noise (20 dB SNR, NS preserving "
+            "clean speech), far-end single-talk (pure AEC), far-end with "
+            "brief near-end overlap (AEC while preserving NE), double-talk "
+            "(AEC while near-end is also talking)."
         ),
     )
     btn.click(enhance, inputs=[mic_in, ref_in], outputs=out)
+    gr.Markdown(
+        f"<sub>Loaded: <code>{MODEL_INFO['source']}</code> · "
+        f"sha256 <code>{MODEL_INFO['sha256'][:16]}…</code> · "
+        f"{MODEL_INFO['n_params']:,} params</sub>"
+    )
 if __name__ == "__main__":
     demo.launch()

examples/dt_mic.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d0c15e56a4c7f387847451952123e8f23161520e81c1ba8878c922f11592a62
+size 320044

examples/dt_ref.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e931d49e2802c1d7b15750caff1455495cd1dc40d27cb2fe51a8c58cbebe60fe
+size 320044

examples/fe_st2_mic.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66507ac731a84abd30d4c2cbc7701d16464d705067faef7f1c926049230483c6
+size 320044

examples/fe_st2_ref.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e1cef5940243d47e2ac450d7781de0a3f0a805fd8843b77830fb7084e662b71
+size 320044

examples/fe_st_mic.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:564b26602ebfb64aa55a7646a2e3ec4c76801e8e602cfb94e07e7f0beed60f0c
+size 320044

examples/fe_st_ref.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:390d63e7ea4741e958f4af4eeb1d729c4b58ccf4acce26dc82273f3c12ead0e5
+size 320044

examples/ne_st_clean_mic.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8392be1a0baa7b8f308dab157209c7cf52f22fefe9de4c0e8a07771b5acefad
+size 320044

examples/ne_st_clean_ref.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16295fb4145e19a50076b0e24ef051186a01a28276532c549416fff9259888f8
+size 320044

examples/ne_st_noisy_mic.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d3fb1b800eddd790a1e033a5d553044173cd42e295a8a033a8d97de4e077445
+size 320044

examples/ne_st_noisy_ref.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b1907eaf1a27fdc96774ac001fe11766142aadddbe36a95e563be78b708e59d
+size 320044