File size: 4,299 Bytes
66e2a44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
# Script de instalacao completa para MuseTalk V1.5 + StyleTTS2
# Testado em: vast.ai com CUDA 12.4, Python 3.12, PyTorch 2.6
# Data: Dezembro 2024

set -e

echo "=== AI Video Setup - Instalacao Completa ==="
echo ""

# Cores para output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Funcao de log
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }

# Verificar CUDA
log_info "Verificando CUDA..."
if ! command -v nvcc &> /dev/null; then
    log_warn "nvcc nao encontrado, verificando nvidia-smi..."
    nvidia-smi || { log_error "CUDA nao disponivel!"; exit 1; }
fi

# Instalar dependencias do sistema
log_info "Instalando dependencias do sistema..."
apt-get update -qq
apt-get install -y -qq ffmpeg git wget curl

# Instalar PyTorch (ajuste conforme sua versao de CUDA)
log_info "Instalando PyTorch para CUDA 12.4..."
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 -q

# ============================================
# VERSOES CRITICAS - NAO ALTERAR!
# Estas versoes foram testadas e funcionam juntas
# ============================================
log_info "Instalando pacotes com versoes especificas (CRITICO - nao alterar!)..."

# Ordem de instalacao importa!
pip install accelerate==0.25.0 -q
pip install diffusers==0.21.0 -q
pip install huggingface-hub==0.25.0 -q

log_warn "IMPORTANTE: accelerate==0.25.0 + diffusers==0.21.0 sao obrigatorios!"
log_warn "Versoes mais novas causam erro 'cannot import clear_device_cache'"

# Instalar MMDetection via mim
log_info "Instalando MMDetection..."
pip install -U openmim -q
mim install mmengine -q
mim install mmcv -q
mim install mmdet -q

# Instalar StyleTTS2
log_info "Instalando StyleTTS2..."
pip install styletts2 -q

# Baixar dados NLTK (necessario para StyleTTS2)
log_info "Baixando dados NLTK..."
python3 -c "import nltk; nltk.download('punkt', quiet=True); nltk.download('punkt_tab', quiet=True)"

# Instalar MuseTalk
log_info "Clonando MuseTalk V1.5..."
cd /root
if [ ! -d "musetalk-space" ]; then
    git clone https://github.com/TMElyralab/MuseTalk.git musetalk-space
fi

cd musetalk-space

# Instalar dependencias do MuseTalk
log_info "Instalando dependencias do MuseTalk..."
pip install -r requirements.txt -q 2>/dev/null || true

# Baixar modelos do MuseTalk
log_info "Baixando modelos do MuseTalk (pode demorar)..."
mkdir -p models/musetalkV15
mkdir -p models/dwpose
mkdir -p models/face-parse-bisent
mkdir -p models/whisper

# Baixar via huggingface-cli
huggingface-cli download TMElyralab/MuseTalk \
    --local-dir ./models \
    --include "musetalkV15/*" "dwpose/*" "face-parse-bisent/*" "whisper/*"

# Outras dependencias uteis
log_info "Instalando outras dependencias..."
pip install yt-dlp ffmpeg-python scipy librosa pyyaml omegaconf einops -q

# ============================================
# REINSTALAR VERSOES CRITICAS (garantir que nao foram sobrescritas)
# ============================================
log_info "Garantindo versoes corretas apos todas as instalacoes..."
pip install accelerate==0.25.0 diffusers==0.21.0 huggingface-hub==0.25.0 -q

# Verificar instalacao
echo ""
log_info "=== Verificando instalacao ==="
python3 -c "
import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA disponivel: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

import accelerate
print(f'Accelerate: {accelerate.__version__}')

import diffusers
print(f'Diffusers: {diffusers.__version__}')

import huggingface_hub
print(f'HuggingFace Hub: {huggingface_hub.__version__}')

try:
    import styletts2
    print('StyleTTS2: OK')
except:
    print('StyleTTS2: ERRO')
"

echo ""
log_info "=== Instalacao concluida! ==="
echo ""
echo "Versoes instaladas (CRITICAS):"
echo "  - accelerate==0.25.0"
echo "  - diffusers==0.21.0"
echo "  - huggingface-hub==0.25.0"
echo ""
echo "Para testar:"
echo "  # Gerar audio com voz clonada"
echo "  python3 scripts/generate_audio.py --text 'Hello world' --voice ref.wav -o output.wav"
echo ""
echo "  # Fazer lip sync"
echo "  cd /root/musetalk-space"
echo "  python3 -m scripts.inference --inference_config config.yaml --result_dir ./output"