#!/bin/bash # ============================================================ # EMOLIPS - Emotion-Driven Lip-Sync Setup Script # Run on RunPod / any Ubuntu GPU instance # Usage: bash setup.sh # ============================================================ set -e echo "==========================================" echo " EMOLIPS Setup - Emotion Lip-Sync MVP" echo "==========================================" # 1. System deps echo "[1/6] Installing system dependencies..." apt-get update -qq && apt-get install -y -qq ffmpeg libsndfile1 > /dev/null 2>&1 echo " ✓ System deps installed" # 2. Python deps echo "[2/6] Installing Python packages..." pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 2>/dev/null || true pip install -q \ transformers \ librosa \ soundfile \ opencv-python-headless \ mediapipe \ numpy \ scipy \ pillow \ tqdm \ gdown \ pyyaml \ imageio \ imageio-ffmpeg \ scikit-image \ kornia \ face-alignment \ dlib \ gradio \ einops \ safetensors \ accelerate \ yacs \ pydub \ resampy echo " ✓ Python packages installed" # 3. Clone SadTalker echo "[3/6] Cloning SadTalker backbone..." if [ ! -d "SadTalker" ]; then git clone --depth 1 https://github.com/OpenTalker/SadTalker.git cd SadTalker pip install -q -r requirements.txt 2>/dev/null || true cd .. fi echo " ✓ SadTalker cloned" # 4. Download SadTalker pretrained weights echo "[4/6] Downloading SadTalker checkpoints..." cd SadTalker if [ ! -d "checkpoints" ]; then mkdir -p checkpoints # SadTalker checkpoint download bash scripts/download_models.sh 2>/dev/null || { echo " ⚠ Auto-download failed. Trying gdown..." mkdir -p checkpoints # Manual download links (these are the key weights) gdown --fuzzy "https://drive.google.com/uc?id=1gwJEawt0Q_7kJXFnhVYQklsb4HGDSM0D" -O checkpoints/ 2>/dev/null || true echo " ⚠ If checkpoints missing, download manually from SadTalker GitHub releases" } fi cd .. echo " ✓ Checkpoints ready (verify manually if needed)" # 5. Download emotion recognition model (will auto-download on first run via HuggingFace) echo "[5/6] Pre-caching emotion recognition model..." python3 -c " from transformers import pipeline print(' Downloading emotion recognition model...') classifier = pipeline('audio-classification', model='ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition', device=-1) print(' ✓ Emotion model cached') " 2>/dev/null || echo " ⚠ Emotion model will download on first inference run" # 6. Setup project structure echo "[6/6] Setting up EMOLIPS project structure..." mkdir -p outputs samples results echo "" echo "==========================================" echo " SETUP COMPLETE!" echo "==========================================" echo "" echo "Quick test:" echo " python inference.py --audio sample.wav --image face.jpg" echo "" echo "With emotion override:" echo " python inference.py --audio sample.wav --image face.jpg --emotion happy --intensity 0.8" echo "" echo "Run all emotions:" echo " python inference.py --audio sample.wav --image face.jpg --all-emotions" echo ""