ROOM / scripts /setup_room.py
solo363614's picture
Upload folder using huggingface_hub
aed1d05 verified
#!/usr/bin/env python3
"""
ROOM Setup β€” Download and install all model components.
Pulls:
1. ACE-Step 1.5 (text β†’ audio)
2. OpenVoice V2 (voice cloning)
3. Demucs (stem separation)
4. BasicPitch (audio β†’ MIDI)
On Lambda / Ubuntu with apt ``python3-torch`` (CUDA 12): use a venv + matching wheels
or you get ``libcudart.so.13`` when pip installs a newer torchaudio:
python3 scripts/bootstrap_venv_room.py
source .venv/bin/activate
python scripts/setup_room.py
(shell alternative: bash scripts/bootstrap_venv_room.sh)
Usage:
python scripts/setup_room.py
python scripts/setup_room.py --skip-acestep (if already installed)
"""
from __future__ import annotations
import argparse
import os
import subprocess
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
def run(cmd: list[str], cwd=None, check=True):
print(f" $ {' '.join(cmd)}")
subprocess.run(cmd, cwd=cwd, check=check)
def pip(*args):
run([sys.executable, "-m", "pip", "install", *args])
def pip_try(*args):
"""Install but do not fail setup (optional CUDA / hub extras)."""
print(f" $ pip install (optional) {' '.join(args)}")
subprocess.run([sys.executable, "-m", "pip", "install", *args], check=False)
def install_acestep_runtime_dependencies() -> None:
"""
ACE-Step is installed with --no-deps so we do not clobber a pinned torch stack.
These are the imports that typically break first if missing (loguru, transformers, …).
"""
print(" Installing ACE-Step runtime Python dependencies (no torch upgrade)...")
pip(
"loguru>=0.7.3",
"accelerate>=1.12.0",
"diffusers>=0.37.0",
"diskcache",
"einops>=0.8.1",
"lightning>=2.0.0",
"lycoris-lora",
"matplotlib>=3.7.5",
"peft>=0.18.0",
"tensorboard>=2.20.0",
"toml",
"typer-slim>=0.21.1",
"vector-quantize-pytorch>=1.27.15",
"transformers>=4.51.0,<4.58.0",
"safetensors",
"sentencepiece",
)
if sys.platform == "linux":
pip_try(
"torchvision==0.21.0",
"--index-url",
"https://download.pytorch.org/whl/cu124",
)
# Nice-to-have; ACE-Step lists them but they often need extra system libs.
for spec in (
"modelscope",
"torchao>=0.16.0,<0.17.0",
"torchcodec>=0.9.1",
):
pip_try(spec)
print(" (Optional) For LM backend vllm, install: pip install nano-vllm # RoomConfig.lm_backend='vllm'")
def main():
ap = argparse.ArgumentParser(description="Set up all ROOM model components")
ap.add_argument("--skip-acestep", action="store_true")
ap.add_argument("--skip-openvoice", action="store_true")
ap.add_argument("--skip-demucs", action="store_true")
ap.add_argument("--skip-basicpitch", action="store_true")
args = ap.parse_args()
models_dir = ROOT / "models"
models_dir.mkdir(exist_ok=True)
# ── 1. ACE-Step 1.5 ───────────────────────────────────────────────
if not args.skip_acestep:
acestep_dir = models_dir / "ace-step"
print("\n[1/4] ACE-Step 1.5")
if not (acestep_dir / "pyproject.toml").exists():
print(" Cloning...")
run(["git", "clone", "https://github.com/ACE-Step/ACE-Step-1.5.git", str(acestep_dir)])
else:
print(" Already cloned.")
# Fix Python version pin
pyproject = acestep_dir / "pyproject.toml"
text = pyproject.read_text()
if ">=3.11,<3.13" in text:
pyproject.write_text(text.replace(">=3.11,<3.13", ">=3.10,<3.14"))
print(" Fixed Python version pin.")
pip("-e", str(acestep_dir), "--no-deps")
install_acestep_runtime_dependencies()
print(" ACE-Step ready. Weights auto-download on first run.")
else:
print("\n[1/4] ACE-Step β€” skipped")
# ── 2. OpenVoice V2 ──────────────────────────────────────────────
if not args.skip_openvoice:
openvoice_dir = models_dir / "openvoice"
print("\n[2/4] OpenVoice V2")
if not (openvoice_dir / "setup.py").exists() and not (openvoice_dir / "pyproject.toml").exists():
print(" Cloning...")
run(["git", "clone", "https://github.com/myshell-ai/OpenVoice.git", str(openvoice_dir)])
else:
print(" Already cloned.")
pip("-e", str(openvoice_dir), "--no-deps")
# Download checkpoints
ckpt_dir = openvoice_dir / "checkpoints_v2"
if not ckpt_dir.exists():
print(" Downloading OpenVoice V2 checkpoints...")
pip("huggingface-hub")
run([
sys.executable, "-c",
"from huggingface_hub import snapshot_download; "
f"snapshot_download('myshell-ai/OpenVoiceV2', local_dir='{ckpt_dir}')"
])
else:
print(" Checkpoints already downloaded.")
# Install OpenVoice deps (avoid OpenVoice's old numpy pin pulling numpy 2.x)
pip("librosa", "wavmark", "whisper-timestamped", "pydub")
# Keep NumPy 1.x for matplotlib / torch extensions on mixed installs
pip("numpy>=1.26,<2")
print(" OpenVoice ready.")
else:
print("\n[2/4] OpenVoice β€” skipped")
# ── 3. Demucs ────────────────────────────────────────────────────
if not args.skip_demucs:
print("\n[3/4] Demucs")
pip("demucs")
print(" Demucs ready. Model downloads on first use (~80MB).")
else:
print("\n[3/4] Demucs β€” skipped")
# ── 4. BasicPitch ────────────────────────────────────────────────
if not args.skip_basicpitch:
print("\n[4/4] BasicPitch")
pip("basic-pitch")
print(" BasicPitch ready.")
else:
print("\n[4/4] BasicPitch β€” skipped")
# ── Config file ──────────────────────────────────────────────────
env_path = ROOT / ".env.room"
with open(env_path, "w") as f:
f.write(f"ACESTEP_ROOT={models_dir / 'ace-step'}\n")
f.write(f"OPENVOICE_ROOT={models_dir / 'openvoice'}\n")
f.write(f"DEMUCS_MODEL=htdemucs\n")
# Whisper/OpenVoice can pull numpy 2.x; ROOM + torch wheels expect1.26.x
print("\n[finalize] Pin numpy<2 for stack stability...")
pip("numpy>=1.26,<2")
print(f"""
==========================================================
ROOM Setup Complete
==========================================================
ACE-Step 1.5 : {'READY' if not args.skip_acestep else 'SKIPPED'}
OpenVoice V2 : {'READY' if not args.skip_openvoice else 'SKIPPED'}
Demucs : {'READY' if not args.skip_demucs else 'SKIPPED'}
BasicPitch : {'READY' if not args.skip_basicpitch else 'SKIPPED'}
Config: {env_path}
If generation fails with libcudart.so.13: system PyTorch is CUDA 12 but pip
installed torchaudio for CUDA 13. Fix:
bash scripts/bootstrap_venv_room.sh
source .venv/bin/activate
python scripts/setup_room.py
Verify imports (catches missing deps before Gradio):
python scripts/verify_room_env.py
Quick test:
python -m modelw.room "piano ballad, E minor, emotional" --stems --midi
Full pipeline with voice cloning:
python -m modelw.room "R&B love song, my style" --voice my_voice.wav --stems --midi
==========================================================
""")
if __name__ == "__main__":
main()