Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| """ | |
| ROOM Setup β Download and install all model components. | |
| Pulls: | |
| 1. ACE-Step 1.5 (text β audio) | |
| 2. OpenVoice V2 (voice cloning) | |
| 3. Demucs (stem separation) | |
| 4. BasicPitch (audio β MIDI) | |
| On Lambda / Ubuntu with apt ``python3-torch`` (CUDA 12): use a venv + matching wheels | |
| or you get ``libcudart.so.13`` when pip installs a newer torchaudio: | |
| python3 scripts/bootstrap_venv_room.py | |
| source .venv/bin/activate | |
| python scripts/setup_room.py | |
| (shell alternative: bash scripts/bootstrap_venv_room.sh) | |
| Usage: | |
| python scripts/setup_room.py | |
| python scripts/setup_room.py --skip-acestep (if already installed) | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import os | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parents[1] | |
| def run(cmd: list[str], cwd=None, check=True): | |
| print(f" $ {' '.join(cmd)}") | |
| subprocess.run(cmd, cwd=cwd, check=check) | |
| def pip(*args): | |
| run([sys.executable, "-m", "pip", "install", *args]) | |
| def pip_try(*args): | |
| """Install but do not fail setup (optional CUDA / hub extras).""" | |
| print(f" $ pip install (optional) {' '.join(args)}") | |
| subprocess.run([sys.executable, "-m", "pip", "install", *args], check=False) | |
| def install_acestep_runtime_dependencies() -> None: | |
| """ | |
| ACE-Step is installed with --no-deps so we do not clobber a pinned torch stack. | |
| These are the imports that typically break first if missing (loguru, transformers, β¦). | |
| """ | |
| print(" Installing ACE-Step runtime Python dependencies (no torch upgrade)...") | |
| pip( | |
| "loguru>=0.7.3", | |
| "accelerate>=1.12.0", | |
| "diffusers>=0.37.0", | |
| "diskcache", | |
| "einops>=0.8.1", | |
| "lightning>=2.0.0", | |
| "lycoris-lora", | |
| "matplotlib>=3.7.5", | |
| "peft>=0.18.0", | |
| "tensorboard>=2.20.0", | |
| "toml", | |
| "typer-slim>=0.21.1", | |
| "vector-quantize-pytorch>=1.27.15", | |
| "transformers>=4.51.0,<4.58.0", | |
| "safetensors", | |
| "sentencepiece", | |
| ) | |
| if sys.platform == "linux": | |
| pip_try( | |
| "torchvision==0.21.0", | |
| "--index-url", | |
| "https://download.pytorch.org/whl/cu124", | |
| ) | |
| # Nice-to-have; ACE-Step lists them but they often need extra system libs. | |
| for spec in ( | |
| "modelscope", | |
| "torchao>=0.16.0,<0.17.0", | |
| "torchcodec>=0.9.1", | |
| ): | |
| pip_try(spec) | |
| print(" (Optional) For LM backend vllm, install: pip install nano-vllm # RoomConfig.lm_backend='vllm'") | |
| def main(): | |
| ap = argparse.ArgumentParser(description="Set up all ROOM model components") | |
| ap.add_argument("--skip-acestep", action="store_true") | |
| ap.add_argument("--skip-openvoice", action="store_true") | |
| ap.add_argument("--skip-demucs", action="store_true") | |
| ap.add_argument("--skip-basicpitch", action="store_true") | |
| args = ap.parse_args() | |
| models_dir = ROOT / "models" | |
| models_dir.mkdir(exist_ok=True) | |
| # ββ 1. ACE-Step 1.5 βββββββββββββββββββββββββββββββββββββββββββββββ | |
| if not args.skip_acestep: | |
| acestep_dir = models_dir / "ace-step" | |
| print("\n[1/4] ACE-Step 1.5") | |
| if not (acestep_dir / "pyproject.toml").exists(): | |
| print(" Cloning...") | |
| run(["git", "clone", "https://github.com/ACE-Step/ACE-Step-1.5.git", str(acestep_dir)]) | |
| else: | |
| print(" Already cloned.") | |
| # Fix Python version pin | |
| pyproject = acestep_dir / "pyproject.toml" | |
| text = pyproject.read_text() | |
| if ">=3.11,<3.13" in text: | |
| pyproject.write_text(text.replace(">=3.11,<3.13", ">=3.10,<3.14")) | |
| print(" Fixed Python version pin.") | |
| pip("-e", str(acestep_dir), "--no-deps") | |
| install_acestep_runtime_dependencies() | |
| print(" ACE-Step ready. Weights auto-download on first run.") | |
| else: | |
| print("\n[1/4] ACE-Step β skipped") | |
| # ββ 2. OpenVoice V2 ββββββββββββββββββββββββββββββββββββββββββββββ | |
| if not args.skip_openvoice: | |
| openvoice_dir = models_dir / "openvoice" | |
| print("\n[2/4] OpenVoice V2") | |
| if not (openvoice_dir / "setup.py").exists() and not (openvoice_dir / "pyproject.toml").exists(): | |
| print(" Cloning...") | |
| run(["git", "clone", "https://github.com/myshell-ai/OpenVoice.git", str(openvoice_dir)]) | |
| else: | |
| print(" Already cloned.") | |
| pip("-e", str(openvoice_dir), "--no-deps") | |
| # Download checkpoints | |
| ckpt_dir = openvoice_dir / "checkpoints_v2" | |
| if not ckpt_dir.exists(): | |
| print(" Downloading OpenVoice V2 checkpoints...") | |
| pip("huggingface-hub") | |
| run([ | |
| sys.executable, "-c", | |
| "from huggingface_hub import snapshot_download; " | |
| f"snapshot_download('myshell-ai/OpenVoiceV2', local_dir='{ckpt_dir}')" | |
| ]) | |
| else: | |
| print(" Checkpoints already downloaded.") | |
| # Install OpenVoice deps (avoid OpenVoice's old numpy pin pulling numpy 2.x) | |
| pip("librosa", "wavmark", "whisper-timestamped", "pydub") | |
| # Keep NumPy 1.x for matplotlib / torch extensions on mixed installs | |
| pip("numpy>=1.26,<2") | |
| print(" OpenVoice ready.") | |
| else: | |
| print("\n[2/4] OpenVoice β skipped") | |
| # ββ 3. Demucs ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if not args.skip_demucs: | |
| print("\n[3/4] Demucs") | |
| pip("demucs") | |
| print(" Demucs ready. Model downloads on first use (~80MB).") | |
| else: | |
| print("\n[3/4] Demucs β skipped") | |
| # ββ 4. BasicPitch ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if not args.skip_basicpitch: | |
| print("\n[4/4] BasicPitch") | |
| pip("basic-pitch") | |
| print(" BasicPitch ready.") | |
| else: | |
| print("\n[4/4] BasicPitch β skipped") | |
| # ββ Config file ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| env_path = ROOT / ".env.room" | |
| with open(env_path, "w") as f: | |
| f.write(f"ACESTEP_ROOT={models_dir / 'ace-step'}\n") | |
| f.write(f"OPENVOICE_ROOT={models_dir / 'openvoice'}\n") | |
| f.write(f"DEMUCS_MODEL=htdemucs\n") | |
| # Whisper/OpenVoice can pull numpy 2.x; ROOM + torch wheels expect1.26.x | |
| print("\n[finalize] Pin numpy<2 for stack stability...") | |
| pip("numpy>=1.26,<2") | |
| print(f""" | |
| ========================================================== | |
| ROOM Setup Complete | |
| ========================================================== | |
| ACE-Step 1.5 : {'READY' if not args.skip_acestep else 'SKIPPED'} | |
| OpenVoice V2 : {'READY' if not args.skip_openvoice else 'SKIPPED'} | |
| Demucs : {'READY' if not args.skip_demucs else 'SKIPPED'} | |
| BasicPitch : {'READY' if not args.skip_basicpitch else 'SKIPPED'} | |
| Config: {env_path} | |
| If generation fails with libcudart.so.13: system PyTorch is CUDA 12 but pip | |
| installed torchaudio for CUDA 13. Fix: | |
| bash scripts/bootstrap_venv_room.sh | |
| source .venv/bin/activate | |
| python scripts/setup_room.py | |
| Verify imports (catches missing deps before Gradio): | |
| python scripts/verify_room_env.py | |
| Quick test: | |
| python -m modelw.room "piano ballad, E minor, emotional" --stems --midi | |
| Full pipeline with voice cloning: | |
| python -m modelw.room "R&B love song, my style" --voice my_voice.wav --stems --midi | |
| ========================================================== | |
| """) | |
| if __name__ == "__main__": | |
| main() | |