# ================================================================ # Avatar App - SadTalker + Coqui TTS (CPU version, HF-ready) # Optimized for Hugging Face Spaces Free Tier # ================================================================ # ---------- Base Image ---------- FROM python:3.10-slim ENV DEBIAN_FRONTEND=noninteractive WORKDIR /app # ---------- System Dependencies ---------- RUN apt-get update && apt-get install -y \ ffmpeg git wget curl build-essential \ espeak espeak-data libespeak-dev \ unzip \ && rm -rf /var/lib/apt/lists/* # ---------- Environment Variables ---------- ENV PYTHONUNBUFFERED=1 ENV COQUI_TOS_AGREED=1 ENV CUDA_VISIBLE_DEVICES="" ENV DEVICE=cpu ENV PYTORCH_ENABLE_MPS_FALLBACK=1 # ---------- Install Python Dependencies (Memory-optimized order) ---------- RUN pip install --no-cache-dir --upgrade pip setuptools wheel # Install NumPy with version that satisfies all dependencies RUN pip install --no-cache-dir "numpy>=1.23.5,<1.24" # CPU-only PyTorch (use version compatible with TTS library) # PyTorch 2.6+ breaks TTS due to weights_only=True default, so use 2.5.1 RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu # Core dependencies - install in smaller batches RUN pip install --no-cache-dir scipy==1.11.3 RUN pip install --no-cache-dir scikit-learn==1.5.0 RUN pip install --no-cache-dir librosa==0.10.2.post1 RUN pip install --no-cache-dir Cython==3.0.10 # Streamlit and lighter dependencies RUN pip install --no-cache-dir \ streamlit \ pydub \ matplotlib \ requests \ python-dotenv \ pyttsx3 # ---------- Clone SadTalker ---------- RUN git clone --depth 1 https://github.com/OpenTalker/SadTalker.git /app/SadTalker # ---------- Download SadTalker Checkpoints ---------- RUN cd /app/SadTalker && \ mkdir -p checkpoints && \ echo "📦 Downloading SadTalker model checkpoints..." && \ wget --progress=bar:force:noscroll --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 \ https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/checkpoints.zip -O checkpoints.zip && \ echo "📦 Extracting checkpoints..." && \ unzip -o checkpoints.zip && \ rm checkpoints.zip && \ echo "✅ Checkpoints downloaded successfully" && \ ls -lh checkpoints/ # Install SadTalker requirements with correct NumPy RUN cd /app/SadTalker && \ sed -i 's/numpy.*/numpy>=1.23.5,<1.24/' requirements.txt && \ pip install --no-cache-dir -r requirements.txt # ---------- Install Coqui TTS Stack (in stages to manage memory) ---------- RUN pip install --no-cache-dir transformers==4.36.2 RUN pip install --no-cache-dir sentencepiece==0.2.0 RUN pip install --no-cache-dir accelerate==0.25.0 RUN pip install --no-cache-dir TTS==0.22.0 # Fix any NumPy version conflicts RUN pip install --no-cache-dir --force-reinstall "numpy>=1.23.5,<1.24" # ---------- Copy Application Files ---------- COPY avatar_streamlit.py /app/ # ---------- Fix NumPy Compatibility in SadTalker ---------- RUN cd /app/SadTalker && \ find . -name "*.py" -type f -exec sed -i 's/\bnp\.float\b/np.float64/g' {} + && \ sed -i 's/warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/# warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/' src/face3d/util/preprocess.py && \ sed -i 's/trans_params = np.array(\[w0, h0, s, t\[0\], t\[1\]\])/trans_params = np.array([w0, h0, s, float(t[0]), float(t[1])])/' src/face3d/util/preprocess.py && \ echo "✅ SadTalker NumPy fixes applied!" # ---------- Fix torchvision compatibility in SadTalker ---------- RUN cd /app/SadTalker && \ find . -name "*.py" -type f -exec sed -i 's/from torchvision.transforms.functional_tensor import rgb_to_grayscale/from torchvision.transforms.functional import rgb_to_grayscale/g' {} + && \ find . -name "*.py" -type f -exec sed -i 's/import torchvision.transforms.functional_tensor/import torchvision.transforms.functional/g' {} + && \ echo "✅ SadTalker torchvision fixes applied!" # ---------- Preload Coqui Model (Skip on build to save memory) ---------- # Model will download on first run instead # RUN python3 -c "from TTS.api import TTS; TTS('tts_models/multilingual/multi-dataset/xtts_v2', gpu=False)" || true # ---------- Expose Port and Run ---------- EXPOSE 8501 CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"] # FORCE_REBUILD: 2025-01-04-v152