ambujm22 commited on
Commit
8071baa
·
verified ·
1 Parent(s): f101c4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -79
app.py CHANGED
@@ -1,67 +1,81 @@
1
  import os
 
 
 
 
 
 
2
  os.environ.setdefault("GRADIO_USE_CDN", "true")
3
 
 
4
  try:
5
  import spaces # HF Spaces SDK
6
  except Exception:
7
  class _DummySpaces:
8
  def GPU(self, *_, **__):
9
- def deco(fn): return fn
 
10
  return deco
11
  spaces = _DummySpaces()
12
 
13
-
14
- @spaces.GPU(duration=10)
15
- def gpu_probe(a: int = 1, b: int = 1):
16
- return a + b
17
-
18
- @spaces.GPU(duration=10)
19
- def gpu_echo(x: str = "ok"):
20
- return x
21
-
22
- # ================= Standard imports =================
23
- import sys
24
- import subprocess
25
- from pathlib import Path
26
- from typing import Tuple, Optional, List, Any
27
-
28
  import gradio as gr
29
  import numpy as np
30
  import soundfile as sf
31
  from huggingface_hub import hf_hub_download
32
 
33
- # Runtime hints (safe on CPU)
34
  USE_ZEROGPU = os.getenv("SPACE_RUNTIME", "").lower() == "zerogpu"
35
 
36
- SPACE_ROOT = Path(__file__).parent.resolve()
37
- REPO_DIR = SPACE_ROOT / "SonicMasterRepo"
38
- REPO_URL = "https://github.com/AMAAI-Lab/SonicMaster"
39
  WEIGHTS_REPO = "amaai-lab/SonicMaster"
40
  WEIGHTS_FILE = "model.safetensors"
41
- CACHE_DIR = SPACE_ROOT / "weights"
42
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
43
 
44
- # ================ Repo clone AT STARTUP (so examples show immediately) ================
45
- def ensure_repo() -> Path:
 
 
 
 
 
 
 
 
 
 
 
 
46
  if not REPO_DIR.exists():
 
 
 
47
  subprocess.run(
48
  ["git", "clone", "--depth", "1", REPO_URL, REPO_DIR.as_posix()],
49
  check=True,
 
 
50
  )
 
51
  if REPO_DIR.as_posix() not in sys.path:
52
  sys.path.append(REPO_DIR.as_posix())
53
- return REPO_DIR
54
 
55
- # Clone now so examples are available immediately
56
- ensure_repo()
57
 
58
- # ================ Weights: still lazy (download at first run) ================
59
  _weights_path: Optional[Path] = None
 
60
  def get_weights_path(progress: Optional[gr.Progress] = None) -> Path:
61
- """Download/resolve weights lazily (keeps startup fast)."""
 
 
62
  global _weights_path
63
  if _weights_path is None:
64
- if progress: progress(0.10, desc="Downloading model weights (first run)")
 
65
  wp = hf_hub_download(
66
  repo_id=WEIGHTS_REPO,
67
  filename=WEIGHTS_FILE,
@@ -73,7 +87,7 @@ def get_weights_path(progress: Optional[gr.Progress] = None) -> Path:
73
  _weights_path = Path(wp)
74
  return _weights_path
75
 
76
- # ================== Helpers ==================
77
  def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
78
  # Ensure shape (samples, channels)
79
  if wav.ndim == 2 and wav.shape[0] < wav.shape[1]:
@@ -84,18 +98,31 @@ def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
84
 
85
  def read_audio(path: str) -> Tuple[np.ndarray, int]:
86
  wav, sr = sf.read(path, always_2d=False)
87
- if wav.dtype == np.float64:
88
  wav = wav.astype(np.float32)
89
  return wav, sr
90
 
91
- def _candidate_commands(py: str, script: Path, ckpt: Path, inp: Path, prompt: str, out: Path) -> List[List[str]]:
 
 
 
92
  """
93
  Only support infer_single.py variants.
94
- Expected primary flags: --ckpt --input --prompt --output
95
  """
96
  return [
97
- [py, script.as_posix(), "--ckpt", ckpt.as_posix(), "--input", inp.as_posix(), "--prompt", prompt, "--output", out.as_posix()],
98
-
 
 
 
 
 
 
 
 
 
 
99
  ]
100
 
101
  def run_sonicmaster_cli(
@@ -104,11 +131,18 @@ def run_sonicmaster_cli(
104
  out_path: Path,
105
  progress: Optional[gr.Progress] = None,
106
  ) -> Tuple[bool, str]:
107
- """Run inference via subprocess; returns (ok, message). Uses ONLY infer_single.py."""
108
- # 🔧 Ensure a non-empty prompt for the CLI
 
 
 
 
 
 
109
  prompt = (prompt or "").strip() or "Enhance the input audio"
110
 
111
- if progress: progress(0.14, desc="Preparing inference")
 
112
  ckpt = get_weights_path(progress=progress)
113
 
114
  script = REPO_DIR / "infer_single.py"
@@ -119,13 +153,16 @@ def run_sonicmaster_cli(
119
  env = os.environ.copy()
120
 
121
  last_err = ""
122
- for cidx, cmd in enumerate(_candidate_commands(py, script, ckpt, input_wav_path, prompt, out_path), 1):
 
 
123
  try:
124
  if progress:
125
- progress(min(0.25 + 0.10 * cidx, 0.70), desc=f"Running infer_single.py (try {cidx})")
126
  res = subprocess.run(cmd, capture_output=True, text=True, check=True, env=env)
127
  if out_path.exists() and out_path.stat().st_size > 0:
128
- if progress: progress(0.88, desc="Post-processing output")
 
129
  return True, (res.stdout or "Inference completed.").strip()
130
  last_err = "infer_single.py finished but produced no output file."
131
  except subprocess.CalledProcessError as e:
@@ -133,16 +170,13 @@ def run_sonicmaster_cli(
133
  last_err = snippet if snippet else f"infer_single.py failed with return code {e.returncode}."
134
  except Exception as e:
135
  import traceback
136
- last_err = f"Unexpected error with infer_single.py: {e}\n{traceback.format_exc()}"
137
- return False, last_err or "All candidate commands failed."
 
138
 
139
  # ============ GPU path (ZeroGPU) ============
140
  @spaces.GPU(duration=60) # safe cap for ZeroGPU tiers
141
  def enhance_on_gpu(input_path: str, prompt: str, output_path: str) -> Tuple[bool, str]:
142
- try:
143
- import torch # noqa: F401
144
- except Exception:
145
- pass
146
  from pathlib import Path as _P
147
  return run_sonicmaster_cli(_P(input_path), prompt, _P(output_path), progress=None)
148
 
@@ -153,7 +187,7 @@ def _has_cuda() -> bool:
153
  except Exception:
154
  return False
155
 
156
- # ================== Examples @ STARTUP ==================
157
  PROMPTS_10 = [
158
  "Increase the clarity of this song by emphasizing treble frequencies.",
159
  "Make this song sound more boomy by amplifying the low end bass frequencies.",
@@ -167,9 +201,14 @@ PROMPTS_10 = [
167
  "Please, dereverb this audio.",
168
  ]
169
 
170
- def build_startup_examples() -> List[List[Any]]:
171
- """Build 10 (audio_path, prompt) pairs from repo at import time."""
 
 
 
172
  wav_dir = REPO_DIR / "samples" / "inputs"
 
 
173
  wav_paths = sorted(p for p in wav_dir.glob("*.wav") if p.is_file())
174
  ex = []
175
  for i, p in enumerate(wav_paths[:10]):
@@ -177,8 +216,6 @@ def build_startup_examples() -> List[List[Any]]:
177
  ex.append([p.as_posix(), pr])
178
  return ex
179
 
180
- STARTUP_EXAMPLES = build_startup_examples()
181
-
182
  # ================== Main callback ==================
183
  def enhance_audio_ui(
184
  audio_path: str,
@@ -189,26 +226,28 @@ def enhance_audio_ui(
189
  Returns (audio, message). On failure, audio=None and message=error text.
190
  """
191
  try:
192
- # 🔧 normalize/fallback so --prompt is always passed
193
- prompt = (prompt or "").strip()
194
- if not prompt:
195
- prompt = "Enhance the input audio"
196
 
197
  if not audio_path:
198
  raise gr.Error("Please upload or select an input audio file.")
199
 
 
 
200
  wav, sr = read_audio(audio_path)
201
- tmp_in = SPACE_ROOT / "tmp_in.wav"
 
202
  tmp_out = SPACE_ROOT / "tmp_out.wav"
203
  if tmp_out.exists():
204
- try: tmp_out.unlink()
205
- except Exception: pass
 
 
206
 
207
- if progress: progress(0.06, desc="Preparing audio")
208
  save_temp_wav(wav, sr, tmp_in)
209
 
210
  use_gpu_call = USE_ZEROGPU or _has_cuda()
211
- if progress: progress(0.12, desc="Starting inference")
 
212
 
213
  if use_gpu_call:
214
  ok, msg = enhance_on_gpu(tmp_in.as_posix(), prompt, tmp_out.as_posix())
@@ -231,45 +270,44 @@ def enhance_audio_ui(
231
  with gr.Blocks(title="SonicMaster – Text-Guided Restoration & Mastering", fill_height=True) as _demo:
232
  gr.Markdown(
233
  "## 🎧 SonicMaster\n"
234
- "Upload audio or pick an example, write a prompt (or leave blank), then click **Enhance**.\n"
235
- "If left blank, we'll use a generic prompt: _Enhance the input audio_.\n"
236
- "- The enhanced audio may take a few seconds to appear after processing. Please wait until the output loads.\n"
237
- "- Please note that if it is the first run, HF will need to download model weights which takes a while.\n"
238
- "\n"
239
- "If you enjoy this model, please cite [our paper](https://huggingface.co/papers/2508.03448). "
240
  )
 
241
  with gr.Row():
242
  with gr.Column(scale=1):
243
  in_audio = gr.Audio(label="Input Audio", type="filepath")
244
- prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., Reduce reverb and brighten vocals. (Optional)")
245
- run_btn = gr.Button("🚀 Enhance", variant="primary")
246
 
247
- # Show 10 audio+prompt examples immediately at startup
248
- if STARTUP_EXAMPLES:
 
249
  gr.Examples(
250
- examples=STARTUP_EXAMPLES,
251
- inputs=[in_audio, prompt],
252
  label="Sample Inputs (10)",
253
  )
254
  else:
255
- gr.Markdown("> ⚠️ No sample .wav files found in `samples/inputs/`.")
256
 
257
  with gr.Column(scale=1):
258
  out_audio = gr.Audio(label="Enhanced Audio (output)")
259
- status = gr.Textbox(label="Status / Messages", interactive=False, lines=8)
260
 
261
  run_btn.click(
262
  fn=enhance_audio_ui,
263
- inputs=[in_audio, prompt],
264
  outputs=[out_audio, status],
265
  concurrency_limit=1,
266
  )
267
 
268
- # Expose all common names the supervisor might look for
269
  demo = _demo.queue(max_size=16)
270
  iface = demo
271
  app = demo
272
 
273
- # Local debugging only
274
  if __name__ == "__main__":
275
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
+ import sys
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import Tuple, Optional, List, Any
6
+
7
+ # Make Gradio assets reliable on Spaces
8
  os.environ.setdefault("GRADIO_USE_CDN", "true")
9
 
10
+ # --- HF Spaces SDK (optional) ---
11
  try:
12
  import spaces # HF Spaces SDK
13
  except Exception:
14
  class _DummySpaces:
15
  def GPU(self, *_, **__):
16
+ def deco(fn):
17
+ return fn
18
  return deco
19
  spaces = _DummySpaces()
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  import gradio as gr
22
  import numpy as np
23
  import soundfile as sf
24
  from huggingface_hub import hf_hub_download
25
 
26
+ # ================= Runtime hints (safe on CPU) =================
27
  USE_ZEROGPU = os.getenv("SPACE_RUNTIME", "").lower() == "zerogpu"
28
 
29
+ SPACE_ROOT = Path(__file__).parent.resolve()
30
+ REPO_DIR = SPACE_ROOT / "SonicMasterRepo"
31
+ REPO_URL = "https://github.com/AMAAI-Lab/SonicMaster"
32
  WEIGHTS_REPO = "amaai-lab/SonicMaster"
33
  WEIGHTS_FILE = "model.safetensors"
34
+ CACHE_DIR = SPACE_ROOT / "weights"
35
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
36
 
37
+ # ================== SAFE repo handling (NO network at import) ==================
38
+ _repo_ready: bool = False
39
+
40
+ def ensure_repo(progress: Optional[gr.Progress] = None) -> Path:
41
+ """
42
+ Ensure SonicMaster repo is available.
43
+ IMPORTANT: Called lazily (on user action), not at import time.
44
+ """
45
+ global _repo_ready
46
+ if _repo_ready and REPO_DIR.exists():
47
+ if REPO_DIR.as_posix() not in sys.path:
48
+ sys.path.append(REPO_DIR.as_posix())
49
+ return REPO_DIR
50
+
51
  if not REPO_DIR.exists():
52
+ if progress:
53
+ progress(0.02, desc="Cloning SonicMaster repo (first run)")
54
+ # Shallow clone to keep it fast
55
  subprocess.run(
56
  ["git", "clone", "--depth", "1", REPO_URL, REPO_DIR.as_posix()],
57
  check=True,
58
+ capture_output=True,
59
+ text=True,
60
  )
61
+
62
  if REPO_DIR.as_posix() not in sys.path:
63
  sys.path.append(REPO_DIR.as_posix())
 
64
 
65
+ _repo_ready = True
66
+ return REPO_DIR
67
 
68
+ # ================ Weights: lazy download (first click) ================
69
  _weights_path: Optional[Path] = None
70
+
71
  def get_weights_path(progress: Optional[gr.Progress] = None) -> Path:
72
+ """
73
+ Download/resolve weights lazily (keeps startup fast).
74
+ """
75
  global _weights_path
76
  if _weights_path is None:
77
+ if progress:
78
+ progress(0.10, desc="Downloading model weights (first run)")
79
  wp = hf_hub_download(
80
  repo_id=WEIGHTS_REPO,
81
  filename=WEIGHTS_FILE,
 
87
  _weights_path = Path(wp)
88
  return _weights_path
89
 
90
+ # ================== Audio helpers ==================
91
  def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
92
  # Ensure shape (samples, channels)
93
  if wav.ndim == 2 and wav.shape[0] < wav.shape[1]:
 
98
 
99
  def read_audio(path: str) -> Tuple[np.ndarray, int]:
100
  wav, sr = sf.read(path, always_2d=False)
101
+ if isinstance(wav, np.ndarray) and wav.dtype == np.float64:
102
  wav = wav.astype(np.float32)
103
  return wav, sr
104
 
105
+ # ================== CLI runner ==================
106
+ def _candidate_commands(
107
+ py: str, script: Path, ckpt: Path, inp: Path, prompt: str, out: Path
108
+ ) -> List[List[str]]:
109
  """
110
  Only support infer_single.py variants.
111
+ Expected flags: --ckpt --input --prompt --output
112
  """
113
  return [
114
+ [
115
+ py,
116
+ script.as_posix(),
117
+ "--ckpt",
118
+ ckpt.as_posix(),
119
+ "--input",
120
+ inp.as_posix(),
121
+ "--prompt",
122
+ prompt,
123
+ "--output",
124
+ out.as_posix(),
125
+ ],
126
  ]
127
 
128
  def run_sonicmaster_cli(
 
131
  out_path: Path,
132
  progress: Optional[gr.Progress] = None,
133
  ) -> Tuple[bool, str]:
134
+ """
135
+ Run inference via subprocess; returns (ok, message).
136
+ Uses ONLY infer_single.py.
137
+ """
138
+ # Ensure repo is present when needed (NOT at startup)
139
+ ensure_repo(progress=progress)
140
+
141
+ # Ensure a non-empty prompt for the CLI
142
  prompt = (prompt or "").strip() or "Enhance the input audio"
143
 
144
+ if progress:
145
+ progress(0.14, desc="Preparing inference")
146
  ckpt = get_weights_path(progress=progress)
147
 
148
  script = REPO_DIR / "infer_single.py"
 
153
  env = os.environ.copy()
154
 
155
  last_err = ""
156
+ for cidx, cmd in enumerate(
157
+ _candidate_commands(py, script, ckpt, input_wav_path, prompt, out_path), 1
158
+ ):
159
  try:
160
  if progress:
161
+ progress(min(0.25 + 0.10 * cidx, 0.70), desc=f"Running inference (try {cidx})")
162
  res = subprocess.run(cmd, capture_output=True, text=True, check=True, env=env)
163
  if out_path.exists() and out_path.stat().st_size > 0:
164
+ if progress:
165
+ progress(0.88, desc="Post-processing output")
166
  return True, (res.stdout or "Inference completed.").strip()
167
  last_err = "infer_single.py finished but produced no output file."
168
  except subprocess.CalledProcessError as e:
 
170
  last_err = snippet if snippet else f"infer_single.py failed with return code {e.returncode}."
171
  except Exception as e:
172
  import traceback
173
+ last_err = f"Unexpected error: {e}\n{traceback.format_exc()}"
174
+
175
+ return False, last_err or "Inference failed."
176
 
177
  # ============ GPU path (ZeroGPU) ============
178
  @spaces.GPU(duration=60) # safe cap for ZeroGPU tiers
179
  def enhance_on_gpu(input_path: str, prompt: str, output_path: str) -> Tuple[bool, str]:
 
 
 
 
180
  from pathlib import Path as _P
181
  return run_sonicmaster_cli(_P(input_path), prompt, _P(output_path), progress=None)
182
 
 
187
  except Exception:
188
  return False
189
 
190
+ # ================== Optional Examples (NO CLONE AT STARTUP) ==================
191
  PROMPTS_10 = [
192
  "Increase the clarity of this song by emphasizing treble frequencies.",
193
  "Make this song sound more boomy by amplifying the low end bass frequencies.",
 
201
  "Please, dereverb this audio.",
202
  ]
203
 
204
+ def build_examples_if_repo_present() -> List[List[Any]]:
205
+ """
206
+ Build examples WITHOUT cloning. If repo isn't present yet, return [].
207
+ This avoids slow startup + network calls.
208
+ """
209
  wav_dir = REPO_DIR / "samples" / "inputs"
210
+ if not wav_dir.exists():
211
+ return []
212
  wav_paths = sorted(p for p in wav_dir.glob("*.wav") if p.is_file())
213
  ex = []
214
  for i, p in enumerate(wav_paths[:10]):
 
216
  ex.append([p.as_posix(), pr])
217
  return ex
218
 
 
 
219
  # ================== Main callback ==================
220
  def enhance_audio_ui(
221
  audio_path: str,
 
226
  Returns (audio, message). On failure, audio=None and message=error text.
227
  """
228
  try:
229
+ prompt = (prompt or "").strip() or "Enhance the input audio"
 
 
 
230
 
231
  if not audio_path:
232
  raise gr.Error("Please upload or select an input audio file.")
233
 
234
+ if progress:
235
+ progress(0.03, desc="Preparing audio")
236
  wav, sr = read_audio(audio_path)
237
+
238
+ tmp_in = SPACE_ROOT / "tmp_in.wav"
239
  tmp_out = SPACE_ROOT / "tmp_out.wav"
240
  if tmp_out.exists():
241
+ try:
242
+ tmp_out.unlink()
243
+ except Exception:
244
+ pass
245
 
 
246
  save_temp_wav(wav, sr, tmp_in)
247
 
248
  use_gpu_call = USE_ZEROGPU or _has_cuda()
249
+ if progress:
250
+ progress(0.12, desc="Starting inference")
251
 
252
  if use_gpu_call:
253
  ok, msg = enhance_on_gpu(tmp_in.as_posix(), prompt, tmp_out.as_posix())
 
270
  with gr.Blocks(title="SonicMaster – Text-Guided Restoration & Mastering", fill_height=True) as _demo:
271
  gr.Markdown(
272
  "## 🎧 SonicMaster\n"
273
+ "Upload audio, write a prompt (or leave blank), then click **Enhance**.\n"
274
+ "If left blank, we use: _Enhance the input audio_.\n\n"
275
+ "- First run will clone the repo + download weights (may take a bit).\n"
276
+ "- Subsequent runs are much faster.\n"
277
+ "If you enjoy this model, please cite the paper."
 
278
  )
279
+
280
  with gr.Row():
281
  with gr.Column(scale=1):
282
  in_audio = gr.Audio(label="Input Audio", type="filepath")
283
+ prompt_box = gr.Textbox(label="Text Prompt", placeholder="e.g., Reduce reverb and brighten vocals. (Optional)")
284
+ run_btn = gr.Button("🚀 Enhance", variant="primary")
285
 
286
+ # Examples only if already present locally (no startup clone)
287
+ examples = build_examples_if_repo_present()
288
+ if examples:
289
  gr.Examples(
290
+ examples=examples,
291
+ inputs=[in_audio, prompt_box],
292
  label="Sample Inputs (10)",
293
  )
294
  else:
295
+ gr.Markdown("> ℹ️ Samples will appear after the repo is cloned (first run).")
296
 
297
  with gr.Column(scale=1):
298
  out_audio = gr.Audio(label="Enhanced Audio (output)")
299
+ status = gr.Textbox(label="Status / Messages", interactive=False, lines=8)
300
 
301
  run_btn.click(
302
  fn=enhance_audio_ui,
303
+ inputs=[in_audio, prompt_box],
304
  outputs=[out_audio, status],
305
  concurrency_limit=1,
306
  )
307
 
 
308
  demo = _demo.queue(max_size=16)
309
  iface = demo
310
  app = demo
311
 
 
312
  if __name__ == "__main__":
313
+ demo.launch(server_name="0.0.0.0", server_port=7860)