File size: 7,946 Bytes
aed1d05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/usr/bin/env python3
"""
ROOM Setup β€” Download and install all model components.

Pulls:
  1. ACE-Step 1.5   (text β†’ audio)
  2. OpenVoice V2   (voice cloning)
  3. Demucs         (stem separation)
  4. BasicPitch     (audio β†’ MIDI)

On Lambda / Ubuntu with apt ``python3-torch`` (CUDA 12): use a venv + matching wheels
or you get ``libcudart.so.13`` when pip installs a newer torchaudio:

  python3 scripts/bootstrap_venv_room.py
  source .venv/bin/activate
  python scripts/setup_room.py

  (shell alternative: bash scripts/bootstrap_venv_room.sh)

Usage:
  python scripts/setup_room.py
  python scripts/setup_room.py --skip-acestep   (if already installed)
"""

from __future__ import annotations

import argparse
import os
import subprocess
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]


def run(cmd: list[str], cwd=None, check=True):
    print(f"  $ {' '.join(cmd)}")
    subprocess.run(cmd, cwd=cwd, check=check)


def pip(*args):
    run([sys.executable, "-m", "pip", "install", *args])


def pip_try(*args):
    """Install but do not fail setup (optional CUDA / hub extras)."""
    print(f"  $ pip install (optional) {' '.join(args)}")
    subprocess.run([sys.executable, "-m", "pip", "install", *args], check=False)


def install_acestep_runtime_dependencies() -> None:
    """
    ACE-Step is installed with --no-deps so we do not clobber a pinned torch stack.
    These are the imports that typically break first if missing (loguru, transformers, …).
    """
    print("  Installing ACE-Step runtime Python dependencies (no torch upgrade)...")
    pip(
        "loguru>=0.7.3",
        "accelerate>=1.12.0",
        "diffusers>=0.37.0",
        "diskcache",
        "einops>=0.8.1",
        "lightning>=2.0.0",
        "lycoris-lora",
        "matplotlib>=3.7.5",
        "peft>=0.18.0",
        "tensorboard>=2.20.0",
        "toml",
        "typer-slim>=0.21.1",
        "vector-quantize-pytorch>=1.27.15",
        "transformers>=4.51.0,<4.58.0",
        "safetensors",
        "sentencepiece",
    )
    if sys.platform == "linux":
        pip_try(
            "torchvision==0.21.0",
            "--index-url",
            "https://download.pytorch.org/whl/cu124",
        )
    # Nice-to-have; ACE-Step lists them but they often need extra system libs.
    for spec in (
        "modelscope",
        "torchao>=0.16.0,<0.17.0",
        "torchcodec>=0.9.1",
    ):
        pip_try(spec)
    print("  (Optional) For LM backend vllm, install: pip install nano-vllm  # RoomConfig.lm_backend='vllm'")


def main():
    ap = argparse.ArgumentParser(description="Set up all ROOM model components")
    ap.add_argument("--skip-acestep", action="store_true")
    ap.add_argument("--skip-openvoice", action="store_true")
    ap.add_argument("--skip-demucs", action="store_true")
    ap.add_argument("--skip-basicpitch", action="store_true")
    args = ap.parse_args()

    models_dir = ROOT / "models"
    models_dir.mkdir(exist_ok=True)

    # ── 1. ACE-Step 1.5 ───────────────────────────────────────────────
    if not args.skip_acestep:
        acestep_dir = models_dir / "ace-step"
        print("\n[1/4] ACE-Step 1.5")
        if not (acestep_dir / "pyproject.toml").exists():
            print("  Cloning...")
            run(["git", "clone", "https://github.com/ACE-Step/ACE-Step-1.5.git", str(acestep_dir)])
        else:
            print("  Already cloned.")

        # Fix Python version pin
        pyproject = acestep_dir / "pyproject.toml"
        text = pyproject.read_text()
        if ">=3.11,<3.13" in text:
            pyproject.write_text(text.replace(">=3.11,<3.13", ">=3.10,<3.14"))
            print("  Fixed Python version pin.")

        pip("-e", str(acestep_dir), "--no-deps")
        install_acestep_runtime_dependencies()
        print("  ACE-Step ready. Weights auto-download on first run.")
    else:
        print("\n[1/4] ACE-Step β€” skipped")

    # ── 2. OpenVoice V2 ──────────────────────────────────────────────
    if not args.skip_openvoice:
        openvoice_dir = models_dir / "openvoice"
        print("\n[2/4] OpenVoice V2")
        if not (openvoice_dir / "setup.py").exists() and not (openvoice_dir / "pyproject.toml").exists():
            print("  Cloning...")
            run(["git", "clone", "https://github.com/myshell-ai/OpenVoice.git", str(openvoice_dir)])
        else:
            print("  Already cloned.")

        pip("-e", str(openvoice_dir), "--no-deps")

        # Download checkpoints
        ckpt_dir = openvoice_dir / "checkpoints_v2"
        if not ckpt_dir.exists():
            print("  Downloading OpenVoice V2 checkpoints...")
            pip("huggingface-hub")
            run([
                sys.executable, "-c",
                "from huggingface_hub import snapshot_download; "
                f"snapshot_download('myshell-ai/OpenVoiceV2', local_dir='{ckpt_dir}')"
            ])
        else:
            print("  Checkpoints already downloaded.")

        # Install OpenVoice deps (avoid OpenVoice's old numpy pin pulling numpy 2.x)
        pip("librosa", "wavmark", "whisper-timestamped", "pydub")
        # Keep NumPy 1.x for matplotlib / torch extensions on mixed installs
        pip("numpy>=1.26,<2")
        print("  OpenVoice ready.")
    else:
        print("\n[2/4] OpenVoice β€” skipped")

    # ── 3. Demucs ────────────────────────────────────────────────────
    if not args.skip_demucs:
        print("\n[3/4] Demucs")
        pip("demucs")
        print("  Demucs ready. Model downloads on first use (~80MB).")
    else:
        print("\n[3/4] Demucs β€” skipped")

    # ── 4. BasicPitch ────────────────────────────────────────────────
    if not args.skip_basicpitch:
        print("\n[4/4] BasicPitch")
        pip("basic-pitch")
        print("  BasicPitch ready.")
    else:
        print("\n[4/4] BasicPitch β€” skipped")

    # ── Config file ──────────────────────────────────────────────────
    env_path = ROOT / ".env.room"
    with open(env_path, "w") as f:
        f.write(f"ACESTEP_ROOT={models_dir / 'ace-step'}\n")
        f.write(f"OPENVOICE_ROOT={models_dir / 'openvoice'}\n")
        f.write(f"DEMUCS_MODEL=htdemucs\n")

    # Whisper/OpenVoice can pull numpy 2.x; ROOM + torch wheels expect1.26.x
    print("\n[finalize] Pin numpy<2 for stack stability...")
    pip("numpy>=1.26,<2")

    print(f"""
==========================================================
  ROOM Setup Complete
==========================================================

  ACE-Step 1.5 : {'READY' if not args.skip_acestep else 'SKIPPED'}
  OpenVoice V2 : {'READY' if not args.skip_openvoice else 'SKIPPED'}
  Demucs       : {'READY' if not args.skip_demucs else 'SKIPPED'}
  BasicPitch   : {'READY' if not args.skip_basicpitch else 'SKIPPED'}

  Config: {env_path}

  If generation fails with libcudart.so.13: system PyTorch is CUDA 12 but pip
  installed torchaudio for CUDA 13. Fix:

    bash scripts/bootstrap_venv_room.sh
    source .venv/bin/activate
    python scripts/setup_room.py

  Verify imports (catches missing deps before Gradio):
    python scripts/verify_room_env.py

  Quick test:
    python -m modelw.room "piano ballad, E minor, emotional" --stems --midi

  Full pipeline with voice cloning:
    python -m modelw.room "R&B love song, my style" --voice my_voice.wav --stems --midi

==========================================================
""")


if __name__ == "__main__":
    main()