#!/bin/bash # Speech-X — First-time environment setup # Creates the conda 'avatar' environment and installs all dependencies in stages. # Run from the repo root: bash setup/setup.sh set -e CONDA_ENV="avatar" echo "=== Speech-X Setup (conda env: $CONDA_ENV) ===" # ── Stage 0: sanity checks ─────────────────────────────────────────────────── if ! command -v conda &>/dev/null; then echo "ERROR: conda not found." echo "Install Miniconda: https://docs.conda.io/en/latest/miniconda.html" exit 1 fi # Ensure conda is initialized for zsh/bash in this container conda init bash zsh &>/dev/null if command -v nvidia-smi &>/dev/null; then echo "GPU detected:" nvidia-smi --query-gpu=name,memory.total --format=csv,noheader else echo "WARNING: nvidia-smi not found — CPU-only mode." fi # ── Stage 1: Create conda environment ──────────────────────────────────────── if conda env list | grep -q "^$CONDA_ENV "; then echo "Conda env '$CONDA_ENV' already exists — skipping creation." else echo "Creating conda env '$CONDA_ENV' (Python 3.12)..." conda create -y -n "$CONDA_ENV" python=3.12 fi RUN="conda run -n $CONDA_ENV" # ── Stage 2: PyTorch ───────────────────────────────────────────────────────── echo "Installing PyTorch 2.5.1 + CUDA 12.4..." $RUN pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 \ --index-url https://download.pytorch.org/whl/cu124 # ── Stage 3: MMLab packages ────────────────────────────────────────────────── echo "Installing MMLab packages..." $RUN pip install --no-cache-dir -U openmim # Force setuptools to a version compatible with Python 3.12 (>=63.2) but older than 70 # to preserve pkg_resources behavior expected by openmim/openxlab. $RUN pip install "setuptools==69.5.1" $RUN mim install mmengine $RUN pip install "mmcv-lite==2.2.0" $RUN mim install "mmdet==3.3.0" # Note: mmpose not required — not present in the reference env # ── Stage 4: MuseTalk core deps ────────────────────────────────────────────── echo "Installing MuseTalk dependencies..." # broken into chunks to show progress and avoid seeming "frozen" $RUN pip install "numpy==2.4.2" "opencv-python==4.13.0.92" $RUN pip install diffusers==0.30.2 accelerate==0.28.0 # tokenizers is automatically handled by transformers $RUN pip install "transformers==4.39.2" "huggingface-hub==0.36.2" $RUN pip install "soundfile==0.12.1" "librosa==0.10.2" "einops==0.8.1" $RUN pip install gdown requests "imageio==2.34.0" imageio-ffmpeg "omegaconf==2.3.0" ffmpeg-python moviepy # ── Stage 5: Project-specific deps ─────────────────────────────────────────── echo "Installing project dependencies..." $RUN pip install -r backend/requirements.txt # ── Frontend ───────────────────────────────────────────────────────────────── echo "Installing frontend dependencies..." (cd frontend && npm install) echo "" echo "=== Setup complete ===" echo "" echo "Activate: conda activate $CONDA_ENV" echo "Avatar page: conda activate $CONDA_ENV && cd backend && python api/server.py" echo "Voice agent: conda activate $CONDA_ENV && cd backend && python agent.py dev" echo "Frontend: cd frontend && npm run dev"