Spaces:

solo363614
/

ROOM

Build error

File size: 7,946 Bytes

aed1d05

#!/usr/bin/env python3
"""
ROOM Setup — Download and install all model components.

Pulls:
  1. ACE-Step 1.5   (text → audio)
  2. OpenVoice V2   (voice cloning)
  3. Demucs         (stem separation)
  4. BasicPitch     (audio → MIDI)

On Lambda / Ubuntu with apt ``python3-torch`` (CUDA 12): use a venv + matching wheels
or you get ``libcudart.so.13`` when pip installs a newer torchaudio:

  python3 scripts/bootstrap_venv_room.py
  source .venv/bin/activate
  python scripts/setup_room.py

  (shell alternative: bash scripts/bootstrap_venv_room.sh)

Usage:
  python scripts/setup_room.py
  python scripts/setup_room.py --skip-acestep   (if already installed)
"""

from __future__ import annotations

import argparse
import os
import subprocess
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]


def run(cmd: list[str], cwd=None, check=True):
    print(f"  $ {' '.join(cmd)}")
    subprocess.run(cmd, cwd=cwd, check=check)


def pip(*args):
    run([sys.executable, "-m", "pip", "install", *args])


def pip_try(*args):
    """Install but do not fail setup (optional CUDA / hub extras)."""
    print(f"  $ pip install (optional) {' '.join(args)}")
    subprocess.run([sys.executable, "-m", "pip", "install", *args], check=False)


def install_acestep_runtime_dependencies() -> None:
    """
    ACE-Step is installed with --no-deps so we do not clobber a pinned torch stack.
    These are the imports that typically break first if missing (loguru, transformers, …).
    """
    print("  Installing ACE-Step runtime Python dependencies (no torch upgrade)...")
    pip(
        "loguru>=0.7.3",
        "accelerate>=1.12.0",
        "diffusers>=0.37.0",
        "diskcache",
        "einops>=0.8.1",
        "lightning>=2.0.0",
        "lycoris-lora",
        "matplotlib>=3.7.5",
        "peft>=0.18.0",
        "tensorboard>=2.20.0",
        "toml",
        "typer-slim>=0.21.1",
        "vector-quantize-pytorch>=1.27.15",
        "transformers>=4.51.0,<4.58.0",
        "safetensors",
        "sentencepiece",
    )
    if sys.platform == "linux":
        pip_try(
            "torchvision==0.21.0",
            "--index-url",
            "https://download.pytorch.org/whl/cu124",
        )
    # Nice-to-have; ACE-Step lists them but they often need extra system libs.
    for spec in (
        "modelscope",
        "torchao>=0.16.0,<0.17.0",
        "torchcodec>=0.9.1",
    ):
        pip_try(spec)
    print("  (Optional) For LM backend vllm, install: pip install nano-vllm  # RoomConfig.lm_backend='vllm'")


def main():
    ap = argparse.ArgumentParser(description="Set up all ROOM model components")
    ap.add_argument("--skip-acestep", action="store_true")
    ap.add_argument("--skip-openvoice", action="store_true")
    ap.add_argument("--skip-demucs", action="store_true")
    ap.add_argument("--skip-basicpitch", action="store_true")
    args = ap.parse_args()

    models_dir = ROOT / "models"
    models_dir.mkdir(exist_ok=True)

    # ── 1. ACE-Step 1.5 ───────────────────────────────────────────────
    if not args.skip_acestep:
        acestep_dir = models_dir / "ace-step"
        print("\n[1/4] ACE-Step 1.5")
        if not (acestep_dir / "pyproject.toml").exists():
            print("  Cloning...")
            run(["git", "clone", "https://github.com/ACE-Step/ACE-Step-1.5.git", str(acestep_dir)])
        else:
            print("  Already cloned.")

        # Fix Python version pin
        pyproject = acestep_dir / "pyproject.toml"
        text = pyproject.read_text()
        if ">=3.11,<3.13" in text:
            pyproject.write_text(text.replace(">=3.11,<3.13", ">=3.10,<3.14"))
            print("  Fixed Python version pin.")

        pip("-e", str(acestep_dir), "--no-deps")
        install_acestep_runtime_dependencies()
        print("  ACE-Step ready. Weights auto-download on first run.")
    else:
        print("\n[1/4] ACE-Step — skipped")

    # ── 2. OpenVoice V2 ──────────────────────────────────────────────
    if not args.skip_openvoice:
        openvoice_dir = models_dir / "openvoice"
        print("\n[2/4] OpenVoice V2")
        if not (openvoice_dir / "setup.py").exists() and not (openvoice_dir / "pyproject.toml").exists():
            print("  Cloning...")
            run(["git", "clone", "https://github.com/myshell-ai/OpenVoice.git", str(openvoice_dir)])
        else:
            print("  Already cloned.")

        pip("-e", str(openvoice_dir), "--no-deps")

        # Download checkpoints
        ckpt_dir = openvoice_dir / "checkpoints_v2"
        if not ckpt_dir.exists():
            print("  Downloading OpenVoice V2 checkpoints...")
            pip("huggingface-hub")
            run([
                sys.executable, "-c",
                "from huggingface_hub import snapshot_download; "
                f"snapshot_download('myshell-ai/OpenVoiceV2', local_dir='{ckpt_dir}')"
            ])
        else:
            print("  Checkpoints already downloaded.")

        # Install OpenVoice deps (avoid OpenVoice's old numpy pin pulling numpy 2.x)
        pip("librosa", "wavmark", "whisper-timestamped", "pydub")
        # Keep NumPy 1.x for matplotlib / torch extensions on mixed installs
        pip("numpy>=1.26,<2")
        print("  OpenVoice ready.")
    else:
        print("\n[2/4] OpenVoice — skipped")

    # ── 3. Demucs ────────────────────────────────────────────────────
    if not args.skip_demucs:
        print("\n[3/4] Demucs")
        pip("demucs")
        print("  Demucs ready. Model downloads on first use (~80MB).")
    else:
        print("\n[3/4] Demucs — skipped")

    # ── 4. BasicPitch ────────────────────────────────────────────────
    if not args.skip_basicpitch:
        print("\n[4/4] BasicPitch")
        pip("basic-pitch")
        print("  BasicPitch ready.")
    else:
        print("\n[4/4] BasicPitch — skipped")

    # ── Config file ──────────────────────────────────────────────────
    env_path = ROOT / ".env.room"
    with open(env_path, "w") as f:
        f.write(f"ACESTEP_ROOT={models_dir / 'ace-step'}\n")
        f.write(f"OPENVOICE_ROOT={models_dir / 'openvoice'}\n")
        f.write(f"DEMUCS_MODEL=htdemucs\n")

    # Whisper/OpenVoice can pull numpy 2.x; ROOM + torch wheels expect1.26.x
    print("\n[finalize] Pin numpy<2 for stack stability...")
    pip("numpy>=1.26,<2")

    print(f"""
==========================================================
  ROOM Setup Complete
==========================================================

  ACE-Step 1.5 : {'READY' if not args.skip_acestep else 'SKIPPED'}
  OpenVoice V2 : {'READY' if not args.skip_openvoice else 'SKIPPED'}
  Demucs       : {'READY' if not args.skip_demucs else 'SKIPPED'}
  BasicPitch   : {'READY' if not args.skip_basicpitch else 'SKIPPED'}

  Config: {env_path}

  If generation fails with libcudart.so.13: system PyTorch is CUDA 12 but pip
  installed torchaudio for CUDA 13. Fix:

    bash scripts/bootstrap_venv_room.sh
    source .venv/bin/activate
    python scripts/setup_room.py

  Verify imports (catches missing deps before Gradio):
    python scripts/verify_room_env.py

  Quick test:
    python -m modelw.room "piano ballad, E minor, emotional" --stems --midi

  Full pipeline with voice cloning:
    python -m modelw.room "R&B love song, my style" --voice my_voice.wav --stems --midi

==========================================================
""")


if __name__ == "__main__":
    main()