richiejp commited on
Commit
eee8304
·
verified ·
1 Parent(s): b180d02

Initial upload: LocalVQE demo Space

Browse files
.gitattributes CHANGED
@@ -43,3 +43,13 @@ examples/fe_st2_mic.flac filter=lfs diff=lfs merge=lfs -text
43
  examples/fe_st2_ref.flac filter=lfs diff=lfs merge=lfs -text
44
  examples/ne_st_clean_mic.flac filter=lfs diff=lfs merge=lfs -text
45
  examples/ne_st_clean_ref.flac filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
43
  examples/fe_st2_ref.flac filter=lfs diff=lfs merge=lfs -text
44
  examples/ne_st_clean_mic.flac filter=lfs diff=lfs merge=lfs -text
45
  examples/ne_st_clean_ref.flac filter=lfs diff=lfs merge=lfs -text
46
+ examples/dt_mic.wav filter=lfs diff=lfs merge=lfs -text
47
+ examples/dt_ref.wav filter=lfs diff=lfs merge=lfs -text
48
+ examples/fe_st2_mic.wav filter=lfs diff=lfs merge=lfs -text
49
+ examples/fe_st2_ref.wav filter=lfs diff=lfs merge=lfs -text
50
+ examples/fe_st_mic.wav filter=lfs diff=lfs merge=lfs -text
51
+ examples/fe_st_ref.wav filter=lfs diff=lfs merge=lfs -text
52
+ examples/ne_st_clean_mic.wav filter=lfs diff=lfs merge=lfs -text
53
+ examples/ne_st_clean_ref.wav filter=lfs diff=lfs merge=lfs -text
54
+ examples/ne_st_noisy_mic.wav filter=lfs diff=lfs merge=lfs -text
55
+ examples/ne_st_noisy_ref.wav filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -5,6 +5,7 @@ huggingface.co/LocalAI-io/LocalVQE. Set the env var
5
  `LOCALVQE_LOCAL_CKPT=/path/to/checkpoint.pt` to load a local file
6
  instead — useful for auditioning new training runs.
7
  """
 
8
  import os
9
  from pathlib import Path
10
 
@@ -22,14 +23,25 @@ CKPT_FILE = "localvqe-v1-1.3M.pt"
22
  EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
23
 
24
 
25
- def _build_model() -> LocalVQE:
 
 
 
 
 
 
 
 
26
  local_override = os.environ.get("LOCALVQE_LOCAL_CKPT")
27
  if local_override:
28
  ckpt_path = local_override
 
29
  print(f"Loading local checkpoint: {ckpt_path}")
30
  else:
31
  from huggingface_hub import hf_hub_download
32
  ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=CKPT_FILE)
 
 
33
  cfg = Config()
34
  peek = torch.load(ckpt_path, map_location="cpu", weights_only=False)
35
  apply_ckpt_model_config(peek, cfg)
@@ -43,11 +55,12 @@ def _build_model() -> LocalVQE:
43
  model.align.fold_temperature()
44
  model.eval()
45
  n_params = sum(p.numel() for p in model.parameters())
46
- print(f"LocalVQE loaded: {n_params:,} params from {ckpt_path}")
47
- return model
 
48
 
49
 
50
- MODEL = _build_model()
51
 
52
 
53
  def _load_mono_16k(path: str) -> np.ndarray:
@@ -93,24 +106,24 @@ def enhance(mic_path: str, ref_path: str) -> tuple[int, np.ndarray]:
93
 
94
  EXAMPLES = [
95
  [
96
- str(EXAMPLES_DIR / "ne_st_noisy_mic.flac"),
97
- str(EXAMPLES_DIR / "ne_st_noisy_ref.flac"),
98
  ],
99
  [
100
- str(EXAMPLES_DIR / "ne_st_clean_mic.flac"),
101
- str(EXAMPLES_DIR / "ne_st_clean_ref.flac"),
102
  ],
103
  [
104
- str(EXAMPLES_DIR / "fe_st_mic.flac"),
105
- str(EXAMPLES_DIR / "fe_st_ref.flac"),
106
  ],
107
  [
108
- str(EXAMPLES_DIR / "fe_st2_mic.flac"),
109
- str(EXAMPLES_DIR / "fe_st2_ref.flac"),
110
  ],
111
  [
112
- str(EXAMPLES_DIR / "dt_mic.flac"),
113
- str(EXAMPLES_DIR / "dt_ref.flac"),
114
  ],
115
  ]
116
 
@@ -130,8 +143,10 @@ Provide two inputs:
130
 
131
  Try the bundled examples first — they cover heavy and light
132
  near-end noise (NE-ST mixed with DNS5 background at 5 dB and 20 dB
133
- SNR), two far-end single-talk clips, and a double-talk clip from
134
- the ICASSP 2022 AEC Challenge blind set.
 
 
135
 
136
  Weights: [LocalAI-io/LocalVQE](https://huggingface.co/LocalAI-io/LocalVQE) ·
137
  Code: [github.com/richiejp/LocalVQE](https://github.com/richiejp/LocalVQE)
@@ -152,12 +167,19 @@ with gr.Blocks(title="LocalVQE Demo") as demo:
152
  label=(
153
  "Examples — top to bottom: near-end + heavy noise (5 dB SNR, "
154
  "pure NS), near-end + light noise (20 dB SNR, NS preserving "
155
- "clean speech), two far-end single-talk clips (pure AEC), "
156
- "double-talk (AEC while near-end is also talking)."
 
157
  ),
158
  )
159
 
160
  btn.click(enhance, inputs=[mic_in, ref_in], outputs=out)
161
 
 
 
 
 
 
 
162
  if __name__ == "__main__":
163
  demo.launch()
 
5
  `LOCALVQE_LOCAL_CKPT=/path/to/checkpoint.pt` to load a local file
6
  instead — useful for auditioning new training runs.
7
  """
8
+ import hashlib
9
  import os
10
  from pathlib import Path
11
 
 
23
  EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
24
 
25
 
26
+ def _sha256(path: str) -> str:
27
+ h = hashlib.sha256()
28
+ with open(path, "rb") as f:
29
+ for chunk in iter(lambda: f.read(1 << 20), b""):
30
+ h.update(chunk)
31
+ return h.hexdigest()
32
+
33
+
34
+ def _build_model() -> tuple[LocalVQE, dict]:
35
  local_override = os.environ.get("LOCALVQE_LOCAL_CKPT")
36
  if local_override:
37
  ckpt_path = local_override
38
+ source = f"local:{ckpt_path}"
39
  print(f"Loading local checkpoint: {ckpt_path}")
40
  else:
41
  from huggingface_hub import hf_hub_download
42
  ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=CKPT_FILE)
43
+ source = f"hf:{REPO_ID}/{CKPT_FILE}"
44
+ sha = _sha256(ckpt_path)
45
  cfg = Config()
46
  peek = torch.load(ckpt_path, map_location="cpu", weights_only=False)
47
  apply_ckpt_model_config(peek, cfg)
 
55
  model.align.fold_temperature()
56
  model.eval()
57
  n_params = sum(p.numel() for p in model.parameters())
58
+ info = {"source": source, "sha256": sha, "n_params": n_params}
59
+ print(f"LocalVQE loaded: {n_params:,} params sha256={sha} from {source}")
60
+ return model, info
61
 
62
 
63
+ MODEL, MODEL_INFO = _build_model()
64
 
65
 
66
  def _load_mono_16k(path: str) -> np.ndarray:
 
106
 
107
  EXAMPLES = [
108
  [
109
+ str(EXAMPLES_DIR / "ne_st_noisy_mic.wav"),
110
+ str(EXAMPLES_DIR / "ne_st_noisy_ref.wav"),
111
  ],
112
  [
113
+ str(EXAMPLES_DIR / "ne_st_clean_mic.wav"),
114
+ str(EXAMPLES_DIR / "ne_st_clean_ref.wav"),
115
  ],
116
  [
117
+ str(EXAMPLES_DIR / "fe_st_mic.wav"),
118
+ str(EXAMPLES_DIR / "fe_st_ref.wav"),
119
  ],
120
  [
121
+ str(EXAMPLES_DIR / "fe_st2_mic.wav"),
122
+ str(EXAMPLES_DIR / "fe_st2_ref.wav"),
123
  ],
124
  [
125
+ str(EXAMPLES_DIR / "dt_mic.wav"),
126
+ str(EXAMPLES_DIR / "dt_ref.wav"),
127
  ],
128
  ]
129
 
 
143
 
144
  Try the bundled examples first — they cover heavy and light
145
  near-end noise (NE-ST mixed with DNS5 background at 5 dB and 20 dB
146
+ SNR), a clean far-end single-talk clip, a far-end clip with some
147
+ near-end overlap (mislabelled in the source corpus, but a useful
148
+ test of AEC + near-end preservation together), and a double-talk
149
+ clip — all from the ICASSP 2022 AEC Challenge blind set.
150
 
151
  Weights: [LocalAI-io/LocalVQE](https://huggingface.co/LocalAI-io/LocalVQE) ·
152
  Code: [github.com/richiejp/LocalVQE](https://github.com/richiejp/LocalVQE)
 
167
  label=(
168
  "Examples — top to bottom: near-end + heavy noise (5 dB SNR, "
169
  "pure NS), near-end + light noise (20 dB SNR, NS preserving "
170
+ "clean speech), far-end single-talk (pure AEC), far-end with "
171
+ "brief near-end overlap (AEC while preserving NE), double-talk "
172
+ "(AEC while near-end is also talking)."
173
  ),
174
  )
175
 
176
  btn.click(enhance, inputs=[mic_in, ref_in], outputs=out)
177
 
178
+ gr.Markdown(
179
+ f"<sub>Loaded: <code>{MODEL_INFO['source']}</code> · "
180
+ f"sha256 <code>{MODEL_INFO['sha256'][:16]}…</code> · "
181
+ f"{MODEL_INFO['n_params']:,} params</sub>"
182
+ )
183
+
184
  if __name__ == "__main__":
185
  demo.launch()
examples/dt_mic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d0c15e56a4c7f387847451952123e8f23161520e81c1ba8878c922f11592a62
3
+ size 320044
examples/dt_ref.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e931d49e2802c1d7b15750caff1455495cd1dc40d27cb2fe51a8c58cbebe60fe
3
+ size 320044
examples/fe_st2_mic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66507ac731a84abd30d4c2cbc7701d16464d705067faef7f1c926049230483c6
3
+ size 320044
examples/fe_st2_ref.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e1cef5940243d47e2ac450d7781de0a3f0a805fd8843b77830fb7084e662b71
3
+ size 320044
examples/fe_st_mic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:564b26602ebfb64aa55a7646a2e3ec4c76801e8e602cfb94e07e7f0beed60f0c
3
+ size 320044
examples/fe_st_ref.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390d63e7ea4741e958f4af4eeb1d729c4b58ccf4acce26dc82273f3c12ead0e5
3
+ size 320044
examples/ne_st_clean_mic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8392be1a0baa7b8f308dab157209c7cf52f22fefe9de4c0e8a07771b5acefad
3
+ size 320044
examples/ne_st_clean_ref.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16295fb4145e19a50076b0e24ef051186a01a28276532c549416fff9259888f8
3
+ size 320044
examples/ne_st_noisy_mic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d3fb1b800eddd790a1e033a5d553044173cd42e295a8a033a8d97de4e077445
3
+ size 320044
examples/ne_st_noisy_ref.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b1907eaf1a27fdc96774ac001fe11766142aadddbe36a95e563be78b708e59d
3
+ size 320044