Spaces:
Build error
Build error
File size: 7,946 Bytes
aed1d05 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | #!/usr/bin/env python3
"""
ROOM Setup β Download and install all model components.
Pulls:
1. ACE-Step 1.5 (text β audio)
2. OpenVoice V2 (voice cloning)
3. Demucs (stem separation)
4. BasicPitch (audio β MIDI)
On Lambda / Ubuntu with apt ``python3-torch`` (CUDA 12): use a venv + matching wheels
or you get ``libcudart.so.13`` when pip installs a newer torchaudio:
python3 scripts/bootstrap_venv_room.py
source .venv/bin/activate
python scripts/setup_room.py
(shell alternative: bash scripts/bootstrap_venv_room.sh)
Usage:
python scripts/setup_room.py
python scripts/setup_room.py --skip-acestep (if already installed)
"""
from __future__ import annotations
import argparse
import os
import subprocess
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
def run(cmd: list[str], cwd=None, check=True):
print(f" $ {' '.join(cmd)}")
subprocess.run(cmd, cwd=cwd, check=check)
def pip(*args):
run([sys.executable, "-m", "pip", "install", *args])
def pip_try(*args):
"""Install but do not fail setup (optional CUDA / hub extras)."""
print(f" $ pip install (optional) {' '.join(args)}")
subprocess.run([sys.executable, "-m", "pip", "install", *args], check=False)
def install_acestep_runtime_dependencies() -> None:
"""
ACE-Step is installed with --no-deps so we do not clobber a pinned torch stack.
These are the imports that typically break first if missing (loguru, transformers, β¦).
"""
print(" Installing ACE-Step runtime Python dependencies (no torch upgrade)...")
pip(
"loguru>=0.7.3",
"accelerate>=1.12.0",
"diffusers>=0.37.0",
"diskcache",
"einops>=0.8.1",
"lightning>=2.0.0",
"lycoris-lora",
"matplotlib>=3.7.5",
"peft>=0.18.0",
"tensorboard>=2.20.0",
"toml",
"typer-slim>=0.21.1",
"vector-quantize-pytorch>=1.27.15",
"transformers>=4.51.0,<4.58.0",
"safetensors",
"sentencepiece",
)
if sys.platform == "linux":
pip_try(
"torchvision==0.21.0",
"--index-url",
"https://download.pytorch.org/whl/cu124",
)
# Nice-to-have; ACE-Step lists them but they often need extra system libs.
for spec in (
"modelscope",
"torchao>=0.16.0,<0.17.0",
"torchcodec>=0.9.1",
):
pip_try(spec)
print(" (Optional) For LM backend vllm, install: pip install nano-vllm # RoomConfig.lm_backend='vllm'")
def main():
ap = argparse.ArgumentParser(description="Set up all ROOM model components")
ap.add_argument("--skip-acestep", action="store_true")
ap.add_argument("--skip-openvoice", action="store_true")
ap.add_argument("--skip-demucs", action="store_true")
ap.add_argument("--skip-basicpitch", action="store_true")
args = ap.parse_args()
models_dir = ROOT / "models"
models_dir.mkdir(exist_ok=True)
# ββ 1. ACE-Step 1.5 βββββββββββββββββββββββββββββββββββββββββββββββ
if not args.skip_acestep:
acestep_dir = models_dir / "ace-step"
print("\n[1/4] ACE-Step 1.5")
if not (acestep_dir / "pyproject.toml").exists():
print(" Cloning...")
run(["git", "clone", "https://github.com/ACE-Step/ACE-Step-1.5.git", str(acestep_dir)])
else:
print(" Already cloned.")
# Fix Python version pin
pyproject = acestep_dir / "pyproject.toml"
text = pyproject.read_text()
if ">=3.11,<3.13" in text:
pyproject.write_text(text.replace(">=3.11,<3.13", ">=3.10,<3.14"))
print(" Fixed Python version pin.")
pip("-e", str(acestep_dir), "--no-deps")
install_acestep_runtime_dependencies()
print(" ACE-Step ready. Weights auto-download on first run.")
else:
print("\n[1/4] ACE-Step β skipped")
# ββ 2. OpenVoice V2 ββββββββββββββββββββββββββββββββββββββββββββββ
if not args.skip_openvoice:
openvoice_dir = models_dir / "openvoice"
print("\n[2/4] OpenVoice V2")
if not (openvoice_dir / "setup.py").exists() and not (openvoice_dir / "pyproject.toml").exists():
print(" Cloning...")
run(["git", "clone", "https://github.com/myshell-ai/OpenVoice.git", str(openvoice_dir)])
else:
print(" Already cloned.")
pip("-e", str(openvoice_dir), "--no-deps")
# Download checkpoints
ckpt_dir = openvoice_dir / "checkpoints_v2"
if not ckpt_dir.exists():
print(" Downloading OpenVoice V2 checkpoints...")
pip("huggingface-hub")
run([
sys.executable, "-c",
"from huggingface_hub import snapshot_download; "
f"snapshot_download('myshell-ai/OpenVoiceV2', local_dir='{ckpt_dir}')"
])
else:
print(" Checkpoints already downloaded.")
# Install OpenVoice deps (avoid OpenVoice's old numpy pin pulling numpy 2.x)
pip("librosa", "wavmark", "whisper-timestamped", "pydub")
# Keep NumPy 1.x for matplotlib / torch extensions on mixed installs
pip("numpy>=1.26,<2")
print(" OpenVoice ready.")
else:
print("\n[2/4] OpenVoice β skipped")
# ββ 3. Demucs ββββββββββββββββββββββββββββββββββββββββββββββββββββ
if not args.skip_demucs:
print("\n[3/4] Demucs")
pip("demucs")
print(" Demucs ready. Model downloads on first use (~80MB).")
else:
print("\n[3/4] Demucs β skipped")
# ββ 4. BasicPitch ββββββββββββββββββββββββββββββββββββββββββββββββ
if not args.skip_basicpitch:
print("\n[4/4] BasicPitch")
pip("basic-pitch")
print(" BasicPitch ready.")
else:
print("\n[4/4] BasicPitch β skipped")
# ββ Config file ββββββββββββββββββββββββββββββββββββββββββββββββββ
env_path = ROOT / ".env.room"
with open(env_path, "w") as f:
f.write(f"ACESTEP_ROOT={models_dir / 'ace-step'}\n")
f.write(f"OPENVOICE_ROOT={models_dir / 'openvoice'}\n")
f.write(f"DEMUCS_MODEL=htdemucs\n")
# Whisper/OpenVoice can pull numpy 2.x; ROOM + torch wheels expect1.26.x
print("\n[finalize] Pin numpy<2 for stack stability...")
pip("numpy>=1.26,<2")
print(f"""
==========================================================
ROOM Setup Complete
==========================================================
ACE-Step 1.5 : {'READY' if not args.skip_acestep else 'SKIPPED'}
OpenVoice V2 : {'READY' if not args.skip_openvoice else 'SKIPPED'}
Demucs : {'READY' if not args.skip_demucs else 'SKIPPED'}
BasicPitch : {'READY' if not args.skip_basicpitch else 'SKIPPED'}
Config: {env_path}
If generation fails with libcudart.so.13: system PyTorch is CUDA 12 but pip
installed torchaudio for CUDA 13. Fix:
bash scripts/bootstrap_venv_room.sh
source .venv/bin/activate
python scripts/setup_room.py
Verify imports (catches missing deps before Gradio):
python scripts/verify_room_env.py
Quick test:
python -m modelw.room "piano ballad, E minor, emotional" --stems --midi
Full pipeline with voice cloning:
python -m modelw.room "R&B love song, my style" --voice my_voice.wav --stems --midi
==========================================================
""")
if __name__ == "__main__":
main()
|