SonicMaster

Running

App Files Files Community

ambujm22 commited on 13 days ago

Commit

8071baa

verified ·

1 Parent(s): f101c4e

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -79

app.py CHANGED Viewed

@@ -1,67 +1,81 @@
 import os
 os.environ.setdefault("GRADIO_USE_CDN", "true")
 try:
     import spaces  # HF Spaces SDK
 except Exception:
     class _DummySpaces:
         def GPU(self, *_, **__):
-            def deco(fn): return fn
             return deco
     spaces = _DummySpaces()
-@spaces.GPU(duration=10)
-def gpu_probe(a: int = 1, b: int = 1):
-    return a + b
-@spaces.GPU(duration=10)
-def gpu_echo(x: str = "ok"):
-    return x
-# ================= Standard imports =================
-import sys
-import subprocess
-from pathlib import Path
-from typing import Tuple, Optional, List, Any
 import gradio as gr
 import numpy as np
 import soundfile as sf
 from huggingface_hub import hf_hub_download
-# Runtime hints (safe on CPU)
 USE_ZEROGPU = os.getenv("SPACE_RUNTIME", "").lower() == "zerogpu"
-SPACE_ROOT   = Path(__file__).parent.resolve()
-REPO_DIR     = SPACE_ROOT / "SonicMasterRepo"
-REPO_URL     = "https://github.com/AMAAI-Lab/SonicMaster"
 WEIGHTS_REPO = "amaai-lab/SonicMaster"
 WEIGHTS_FILE = "model.safetensors"
-CACHE_DIR    = SPACE_ROOT / "weights"
 CACHE_DIR.mkdir(parents=True, exist_ok=True)
-# ================ Repo clone AT STARTUP (so examples show immediately) ================
-def ensure_repo() -> Path:
     if not REPO_DIR.exists():
         subprocess.run(
             ["git", "clone", "--depth", "1", REPO_URL, REPO_DIR.as_posix()],
             check=True,
         )
     if REPO_DIR.as_posix() not in sys.path:
         sys.path.append(REPO_DIR.as_posix())
-    return REPO_DIR
-# Clone now so examples are available immediately
-ensure_repo()
-# ================ Weights: still lazy (download at first run) ================
 _weights_path: Optional[Path] = None
 def get_weights_path(progress: Optional[gr.Progress] = None) -> Path:
-    """Download/resolve weights lazily (keeps startup fast)."""
     global _weights_path
     if _weights_path is None:
-        if progress: progress(0.10, desc="Downloading model weights (first run)")
         wp = hf_hub_download(
             repo_id=WEIGHTS_REPO,
             filename=WEIGHTS_FILE,
@@ -73,7 +87,7 @@ def get_weights_path(progress: Optional[gr.Progress] = None) -> Path:
         _weights_path = Path(wp)
     return _weights_path
-# ================== Helpers ==================
 def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
     # Ensure shape (samples, channels)
     if wav.ndim == 2 and wav.shape[0] < wav.shape[1]:
@@ -84,18 +98,31 @@ def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
 def read_audio(path: str) -> Tuple[np.ndarray, int]:
     wav, sr = sf.read(path, always_2d=False)
-    if wav.dtype == np.float64:
         wav = wav.astype(np.float32)
     return wav, sr
-def _candidate_commands(py: str, script: Path, ckpt: Path, inp: Path, prompt: str, out: Path) -> List[List[str]]:
     """
     Only support infer_single.py variants.
-    Expected primary flags: --ckpt --input --prompt --output
     """
     return [
-        [py, script.as_posix(), "--ckpt", ckpt.as_posix(), "--input", inp.as_posix(), "--prompt", prompt, "--output", out.as_posix()],
     ]
 def run_sonicmaster_cli(
@@ -104,11 +131,18 @@ def run_sonicmaster_cli(
     out_path: Path,
     progress: Optional[gr.Progress] = None,
 ) -> Tuple[bool, str]:
-    """Run inference via subprocess; returns (ok, message). Uses ONLY infer_single.py."""
-    # 🔧 Ensure a non-empty prompt for the CLI
     prompt = (prompt or "").strip() or "Enhance the input audio"
-    if progress: progress(0.14, desc="Preparing inference")
     ckpt = get_weights_path(progress=progress)
     script = REPO_DIR / "infer_single.py"
@@ -119,13 +153,16 @@ def run_sonicmaster_cli(
     env = os.environ.copy()
     last_err = ""
-    for cidx, cmd in enumerate(_candidate_commands(py, script, ckpt, input_wav_path, prompt, out_path), 1):
         try:
             if progress:
-                progress(min(0.25 + 0.10 * cidx, 0.70), desc=f"Running infer_single.py (try {cidx})")
             res = subprocess.run(cmd, capture_output=True, text=True, check=True, env=env)
             if out_path.exists() and out_path.stat().st_size > 0:
-                if progress: progress(0.88, desc="Post-processing output")
                 return True, (res.stdout or "Inference completed.").strip()
             last_err = "infer_single.py finished but produced no output file."
         except subprocess.CalledProcessError as e:
@@ -133,16 +170,13 @@ def run_sonicmaster_cli(
             last_err = snippet if snippet else f"infer_single.py failed with return code {e.returncode}."
         except Exception as e:
             import traceback
-            last_err = f"Unexpected error with infer_single.py: {e}\n{traceback.format_exc()}"
-    return False, last_err or "All candidate commands failed."
 # ============ GPU path (ZeroGPU) ============
 @spaces.GPU(duration=60)  # safe cap for ZeroGPU tiers
 def enhance_on_gpu(input_path: str, prompt: str, output_path: str) -> Tuple[bool, str]:
-    try:
-        import torch  # noqa: F401
-    except Exception:
-        pass
     from pathlib import Path as _P
     return run_sonicmaster_cli(_P(input_path), prompt, _P(output_path), progress=None)
@@ -153,7 +187,7 @@ def _has_cuda() -> bool:
     except Exception:
         return False
-# ================== Examples @ STARTUP ==================
 PROMPTS_10 = [
     "Increase the clarity of this song by emphasizing treble frequencies.",
     "Make this song sound more boomy by amplifying the low end bass frequencies.",
@@ -167,9 +201,14 @@ PROMPTS_10 = [
     "Please, dereverb this audio.",
 ]
-def build_startup_examples() -> List[List[Any]]:
-    """Build 10 (audio_path, prompt) pairs from repo at import time."""
     wav_dir = REPO_DIR / "samples" / "inputs"
     wav_paths = sorted(p for p in wav_dir.glob("*.wav") if p.is_file())
     ex = []
     for i, p in enumerate(wav_paths[:10]):
@@ -177,8 +216,6 @@ def build_startup_examples() -> List[List[Any]]:
         ex.append([p.as_posix(), pr])
     return ex
-STARTUP_EXAMPLES = build_startup_examples()
 # ================== Main callback ==================
 def enhance_audio_ui(
     audio_path: str,
@@ -189,26 +226,28 @@ def enhance_audio_ui(
     Returns (audio, message). On failure, audio=None and message=error text.
     """
     try:
-        # 🔧 normalize/fallback so --prompt is always passed
-        prompt = (prompt or "").strip()
-        if not prompt:
-            prompt = "Enhance the input audio"
         if not audio_path:
             raise gr.Error("Please upload or select an input audio file.")
         wav, sr = read_audio(audio_path)
-        tmp_in  = SPACE_ROOT / "tmp_in.wav"
         tmp_out = SPACE_ROOT / "tmp_out.wav"
         if tmp_out.exists():
-            try: tmp_out.unlink()
-            except Exception: pass
-        if progress: progress(0.06, desc="Preparing audio")
         save_temp_wav(wav, sr, tmp_in)
         use_gpu_call = USE_ZEROGPU or _has_cuda()
-        if progress: progress(0.12, desc="Starting inference")
         if use_gpu_call:
             ok, msg = enhance_on_gpu(tmp_in.as_posix(), prompt, tmp_out.as_posix())
@@ -231,45 +270,44 @@ def enhance_audio_ui(
 with gr.Blocks(title="SonicMaster – Text-Guided Restoration & Mastering", fill_height=True) as _demo:
     gr.Markdown(
         "## 🎧 SonicMaster\n"
-        "Upload audio or pick an example, write a prompt (or leave blank), then click **Enhance**.\n"
-        "If left blank, we'll use a generic prompt: _Enhance the input audio_.\n"
-        "- The enhanced audio may take a few seconds to appear after processing. Please wait until the output loads.\n"
-        "- Please note that if it is the first run, HF will need to download model weights which takes a while.\n"
-        "\n"
-        "If you enjoy this model, please cite [our paper](https://huggingface.co/papers/2508.03448). "
     )
     with gr.Row():
         with gr.Column(scale=1):
             in_audio = gr.Audio(label="Input Audio", type="filepath")
-            prompt   = gr.Textbox(label="Text Prompt", placeholder="e.g., Reduce reverb and brighten vocals. (Optional)")
-            run_btn  = gr.Button("🚀 Enhance", variant="primary")
-            # Show 10 audio+prompt examples immediately at startup
-            if STARTUP_EXAMPLES:
                 gr.Examples(
-                    examples=STARTUP_EXAMPLES,
-                    inputs=[in_audio, prompt],
                     label="Sample Inputs (10)",
                 )
             else:
-                gr.Markdown("> ⚠️ No sample .wav files found in `samples/inputs/`.")
         with gr.Column(scale=1):
             out_audio = gr.Audio(label="Enhanced Audio (output)")
-            status    = gr.Textbox(label="Status / Messages", interactive=False, lines=8)
     run_btn.click(
         fn=enhance_audio_ui,
-        inputs=[in_audio, prompt],
         outputs=[out_audio, status],
         concurrency_limit=1,
     )
-# Expose all common names the supervisor might look for
 demo = _demo.queue(max_size=16)
 iface = demo
 app = demo
-# Local debugging only
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
+import sys
+import subprocess
+from pathlib import Path
+from typing import Tuple, Optional, List, Any
+# Make Gradio assets reliable on Spaces
 os.environ.setdefault("GRADIO_USE_CDN", "true")
+# --- HF Spaces SDK (optional) ---
 try:
     import spaces  # HF Spaces SDK
 except Exception:
     class _DummySpaces:
         def GPU(self, *_, **__):
+            def deco(fn):
+                return fn
             return deco
     spaces = _DummySpaces()
 import gradio as gr
 import numpy as np
 import soundfile as sf
 from huggingface_hub import hf_hub_download
+# ================= Runtime hints (safe on CPU) =================
 USE_ZEROGPU = os.getenv("SPACE_RUNTIME", "").lower() == "zerogpu"
+SPACE_ROOT = Path(__file__).parent.resolve()
+REPO_DIR = SPACE_ROOT / "SonicMasterRepo"
+REPO_URL = "https://github.com/AMAAI-Lab/SonicMaster"
 WEIGHTS_REPO = "amaai-lab/SonicMaster"
 WEIGHTS_FILE = "model.safetensors"
+CACHE_DIR = SPACE_ROOT / "weights"
 CACHE_DIR.mkdir(parents=True, exist_ok=True)
+# ================== SAFE repo handling (NO network at import) ==================
+_repo_ready: bool = False
+def ensure_repo(progress: Optional[gr.Progress] = None) -> Path:
+    """
+    Ensure SonicMaster repo is available.
+    IMPORTANT: Called lazily (on user action), not at import time.
+    """
+    global _repo_ready
+    if _repo_ready and REPO_DIR.exists():
+        if REPO_DIR.as_posix() not in sys.path:
+            sys.path.append(REPO_DIR.as_posix())
+        return REPO_DIR
     if not REPO_DIR.exists():
+        if progress:
+            progress(0.02, desc="Cloning SonicMaster repo (first run)")
+        # Shallow clone to keep it fast
         subprocess.run(
             ["git", "clone", "--depth", "1", REPO_URL, REPO_DIR.as_posix()],
             check=True,
+            capture_output=True,
+            text=True,
         )
     if REPO_DIR.as_posix() not in sys.path:
         sys.path.append(REPO_DIR.as_posix())
+    _repo_ready = True
+    return REPO_DIR
+# ================ Weights: lazy download (first click) ================
 _weights_path: Optional[Path] = None
 def get_weights_path(progress: Optional[gr.Progress] = None) -> Path:
+    """
+    Download/resolve weights lazily (keeps startup fast).
+    """
     global _weights_path
     if _weights_path is None:
+        if progress:
+            progress(0.10, desc="Downloading model weights (first run)")
         wp = hf_hub_download(
             repo_id=WEIGHTS_REPO,
             filename=WEIGHTS_FILE,
         _weights_path = Path(wp)
     return _weights_path
+# ================== Audio helpers ==================
 def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
     # Ensure shape (samples, channels)
     if wav.ndim == 2 and wav.shape[0] < wav.shape[1]:
 def read_audio(path: str) -> Tuple[np.ndarray, int]:
     wav, sr = sf.read(path, always_2d=False)
+    if isinstance(wav, np.ndarray) and wav.dtype == np.float64:
         wav = wav.astype(np.float32)
     return wav, sr
+# ================== CLI runner ==================
+def _candidate_commands(
+    py: str, script: Path, ckpt: Path, inp: Path, prompt: str, out: Path
+) -> List[List[str]]:
     """
     Only support infer_single.py variants.
+    Expected flags: --ckpt --input --prompt --output
     """
     return [
+        [
+            py,
+            script.as_posix(),
+            "--ckpt",
+            ckpt.as_posix(),
+            "--input",
+            inp.as_posix(),
+            "--prompt",
+            prompt,
+            "--output",
+            out.as_posix(),
+        ],
     ]
 def run_sonicmaster_cli(
     out_path: Path,
     progress: Optional[gr.Progress] = None,
 ) -> Tuple[bool, str]:
+    """
+    Run inference via subprocess; returns (ok, message).
+    Uses ONLY infer_single.py.
+    """
+    # Ensure repo is present when needed (NOT at startup)
+    ensure_repo(progress=progress)
+    # Ensure a non-empty prompt for the CLI
     prompt = (prompt or "").strip() or "Enhance the input audio"
+    if progress:
+        progress(0.14, desc="Preparing inference")
     ckpt = get_weights_path(progress=progress)
     script = REPO_DIR / "infer_single.py"
     env = os.environ.copy()
     last_err = ""
+    for cidx, cmd in enumerate(
+        _candidate_commands(py, script, ckpt, input_wav_path, prompt, out_path), 1
+    ):
         try:
             if progress:
+                progress(min(0.25 + 0.10 * cidx, 0.70), desc=f"Running inference (try {cidx})")
             res = subprocess.run(cmd, capture_output=True, text=True, check=True, env=env)
             if out_path.exists() and out_path.stat().st_size > 0:
+                if progress:
+                    progress(0.88, desc="Post-processing output")
                 return True, (res.stdout or "Inference completed.").strip()
             last_err = "infer_single.py finished but produced no output file."
         except subprocess.CalledProcessError as e:
             last_err = snippet if snippet else f"infer_single.py failed with return code {e.returncode}."
         except Exception as e:
             import traceback
+            last_err = f"Unexpected error: {e}\n{traceback.format_exc()}"
+    return False, last_err or "Inference failed."
 # ============ GPU path (ZeroGPU) ============
 @spaces.GPU(duration=60)  # safe cap for ZeroGPU tiers
 def enhance_on_gpu(input_path: str, prompt: str, output_path: str) -> Tuple[bool, str]:
     from pathlib import Path as _P
     return run_sonicmaster_cli(_P(input_path), prompt, _P(output_path), progress=None)
     except Exception:
         return False
+# ================== Optional Examples (NO CLONE AT STARTUP) ==================
 PROMPTS_10 = [
     "Increase the clarity of this song by emphasizing treble frequencies.",
     "Make this song sound more boomy by amplifying the low end bass frequencies.",
     "Please, dereverb this audio.",
 ]
+def build_examples_if_repo_present() -> List[List[Any]]:
+    """
+    Build examples WITHOUT cloning. If repo isn't present yet, return [].
+    This avoids slow startup + network calls.
+    """
     wav_dir = REPO_DIR / "samples" / "inputs"
+    if not wav_dir.exists():
+        return []
     wav_paths = sorted(p for p in wav_dir.glob("*.wav") if p.is_file())
     ex = []
     for i, p in enumerate(wav_paths[:10]):
         ex.append([p.as_posix(), pr])
     return ex
 # ================== Main callback ==================
 def enhance_audio_ui(
     audio_path: str,
     Returns (audio, message). On failure, audio=None and message=error text.
     """
     try:
+        prompt = (prompt or "").strip() or "Enhance the input audio"
         if not audio_path:
             raise gr.Error("Please upload or select an input audio file.")
+        if progress:
+            progress(0.03, desc="Preparing audio")
         wav, sr = read_audio(audio_path)
+        tmp_in = SPACE_ROOT / "tmp_in.wav"
         tmp_out = SPACE_ROOT / "tmp_out.wav"
         if tmp_out.exists():
+            try:
+                tmp_out.unlink()
+            except Exception:
+                pass
         save_temp_wav(wav, sr, tmp_in)
         use_gpu_call = USE_ZEROGPU or _has_cuda()
+        if progress:
+            progress(0.12, desc="Starting inference")
         if use_gpu_call:
             ok, msg = enhance_on_gpu(tmp_in.as_posix(), prompt, tmp_out.as_posix())
 with gr.Blocks(title="SonicMaster – Text-Guided Restoration & Mastering", fill_height=True) as _demo:
     gr.Markdown(
         "## 🎧 SonicMaster\n"
+        "Upload audio, write a prompt (or leave blank), then click **Enhance**.\n"
+        "If left blank, we use: _Enhance the input audio_.\n\n"
+        "- First run will clone the repo + download weights (may take a bit).\n"
+        "- Subsequent runs are much faster.\n"
+        "If you enjoy this model, please cite the paper."
     )
     with gr.Row():
         with gr.Column(scale=1):
             in_audio = gr.Audio(label="Input Audio", type="filepath")
+            prompt_box = gr.Textbox(label="Text Prompt", placeholder="e.g., Reduce reverb and brighten vocals. (Optional)")
+            run_btn = gr.Button("🚀 Enhance", variant="primary")
+            # Examples only if already present locally (no startup clone)
+            examples = build_examples_if_repo_present()
+            if examples:
                 gr.Examples(
+                    examples=examples,
+                    inputs=[in_audio, prompt_box],
                     label="Sample Inputs (10)",
                 )
             else:
+                gr.Markdown("> ℹ️ Samples will appear after the repo is cloned (first run).")
         with gr.Column(scale=1):
             out_audio = gr.Audio(label="Enhanced Audio (output)")
+            status = gr.Textbox(label="Status / Messages", interactive=False, lines=8)
     run_btn.click(
         fn=enhance_audio_ui,
+        inputs=[in_audio, prompt_box],
         outputs=[out_audio, status],
         concurrency_limit=1,
     )
 demo = _demo.queue(max_size=16)
 iface = demo
 app = demo
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)