MuseTalk / install.sh
marcos
Add StyleTTS2 integration scripts for voice cloning and lip sync pipeline
66e2a44
#!/bin/bash
# Script de instalacao completa para MuseTalk V1.5 + StyleTTS2
# Testado em: vast.ai com CUDA 12.4, Python 3.12, PyTorch 2.6
# Data: Dezembro 2024
set -e
echo "=== AI Video Setup - Instalacao Completa ==="
echo ""
# Cores para output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Funcao de log
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# Verificar CUDA
log_info "Verificando CUDA..."
if ! command -v nvcc &> /dev/null; then
log_warn "nvcc nao encontrado, verificando nvidia-smi..."
nvidia-smi || { log_error "CUDA nao disponivel!"; exit 1; }
fi
# Instalar dependencias do sistema
log_info "Instalando dependencias do sistema..."
apt-get update -qq
apt-get install -y -qq ffmpeg git wget curl
# Instalar PyTorch (ajuste conforme sua versao de CUDA)
log_info "Instalando PyTorch para CUDA 12.4..."
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 -q
# ============================================
# VERSOES CRITICAS - NAO ALTERAR!
# Estas versoes foram testadas e funcionam juntas
# ============================================
log_info "Instalando pacotes com versoes especificas (CRITICO - nao alterar!)..."
# Ordem de instalacao importa!
pip install accelerate==0.25.0 -q
pip install diffusers==0.21.0 -q
pip install huggingface-hub==0.25.0 -q
log_warn "IMPORTANTE: accelerate==0.25.0 + diffusers==0.21.0 sao obrigatorios!"
log_warn "Versoes mais novas causam erro 'cannot import clear_device_cache'"
# Instalar MMDetection via mim
log_info "Instalando MMDetection..."
pip install -U openmim -q
mim install mmengine -q
mim install mmcv -q
mim install mmdet -q
# Instalar StyleTTS2
log_info "Instalando StyleTTS2..."
pip install styletts2 -q
# Baixar dados NLTK (necessario para StyleTTS2)
log_info "Baixando dados NLTK..."
python3 -c "import nltk; nltk.download('punkt', quiet=True); nltk.download('punkt_tab', quiet=True)"
# Instalar MuseTalk
log_info "Clonando MuseTalk V1.5..."
cd /root
if [ ! -d "musetalk-space" ]; then
git clone https://github.com/TMElyralab/MuseTalk.git musetalk-space
fi
cd musetalk-space
# Instalar dependencias do MuseTalk
log_info "Instalando dependencias do MuseTalk..."
pip install -r requirements.txt -q 2>/dev/null || true
# Baixar modelos do MuseTalk
log_info "Baixando modelos do MuseTalk (pode demorar)..."
mkdir -p models/musetalkV15
mkdir -p models/dwpose
mkdir -p models/face-parse-bisent
mkdir -p models/whisper
# Baixar via huggingface-cli
huggingface-cli download TMElyralab/MuseTalk \
--local-dir ./models \
--include "musetalkV15/*" "dwpose/*" "face-parse-bisent/*" "whisper/*"
# Outras dependencias uteis
log_info "Instalando outras dependencias..."
pip install yt-dlp ffmpeg-python scipy librosa pyyaml omegaconf einops -q
# ============================================
# REINSTALAR VERSOES CRITICAS (garantir que nao foram sobrescritas)
# ============================================
log_info "Garantindo versoes corretas apos todas as instalacoes..."
pip install accelerate==0.25.0 diffusers==0.21.0 huggingface-hub==0.25.0 -q
# Verificar instalacao
echo ""
log_info "=== Verificando instalacao ==="
python3 -c "
import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA disponivel: {torch.cuda.is_available()}')
if torch.cuda.is_available():
print(f'GPU: {torch.cuda.get_device_name(0)}')
import accelerate
print(f'Accelerate: {accelerate.__version__}')
import diffusers
print(f'Diffusers: {diffusers.__version__}')
import huggingface_hub
print(f'HuggingFace Hub: {huggingface_hub.__version__}')
try:
import styletts2
print('StyleTTS2: OK')
except:
print('StyleTTS2: ERRO')
"
echo ""
log_info "=== Instalacao concluida! ==="
echo ""
echo "Versoes instaladas (CRITICAS):"
echo " - accelerate==0.25.0"
echo " - diffusers==0.21.0"
echo " - huggingface-hub==0.25.0"
echo ""
echo "Para testar:"
echo " # Gerar audio com voz clonada"
echo " python3 scripts/generate_audio.py --text 'Hello world' --voice ref.wav -o output.wav"
echo ""
echo " # Fazer lip sync"
echo " cd /root/musetalk-space"
echo " python3 -m scripts.inference --inference_config config.yaml --result_dir ./output"