my_bot / Dockerfile
gadhalevy
dock
b249e99
# ================================================================
# Avatar App - SadTalker + Coqui TTS (CPU version, HF-ready)
# Optimized for Hugging Face Spaces Free Tier
# ================================================================
# ---------- Base Image ----------
FROM python:3.10-slim
ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /app
# ---------- System Dependencies ----------
RUN apt-get update && apt-get install -y \
ffmpeg git wget curl build-essential \
espeak espeak-data libespeak-dev \
unzip \
&& rm -rf /var/lib/apt/lists/*
# ---------- Environment Variables ----------
ENV PYTHONUNBUFFERED=1
ENV COQUI_TOS_AGREED=1
ENV CUDA_VISIBLE_DEVICES=""
ENV DEVICE=cpu
ENV PYTORCH_ENABLE_MPS_FALLBACK=1
# ---------- Install Python Dependencies (Memory-optimized order) ----------
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
# Install NumPy with version that satisfies all dependencies
RUN pip install --no-cache-dir "numpy>=1.23.5,<1.24"
# CPU-only PyTorch (use version compatible with TTS library)
# PyTorch 2.6+ breaks TTS due to weights_only=True default, so use 2.5.1
RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu
# Core dependencies - install in smaller batches
RUN pip install --no-cache-dir scipy==1.11.3
RUN pip install --no-cache-dir scikit-learn==1.5.0
RUN pip install --no-cache-dir librosa==0.10.2.post1
RUN pip install --no-cache-dir Cython==3.0.10
# Streamlit and lighter dependencies
RUN pip install --no-cache-dir \
streamlit \
pydub \
matplotlib \
requests \
python-dotenv \
pyttsx3
# ---------- Clone SadTalker ----------
RUN git clone --depth 1 https://github.com/OpenTalker/SadTalker.git /app/SadTalker
# ---------- Download SadTalker Checkpoints ----------
RUN cd /app/SadTalker && \
mkdir -p checkpoints && \
echo "πŸ“¦ Downloading SadTalker model checkpoints..." && \
wget --progress=bar:force:noscroll --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 \
https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/checkpoints.zip -O checkpoints.zip && \
echo "πŸ“¦ Extracting checkpoints..." && \
unzip -o checkpoints.zip && \
rm checkpoints.zip && \
echo "βœ… Checkpoints downloaded successfully" && \
ls -lh checkpoints/
# Install SadTalker requirements with correct NumPy
RUN cd /app/SadTalker && \
sed -i 's/numpy.*/numpy>=1.23.5,<1.24/' requirements.txt && \
pip install --no-cache-dir -r requirements.txt
# ---------- Install Coqui TTS Stack (in stages to manage memory) ----------
RUN pip install --no-cache-dir transformers==4.36.2
RUN pip install --no-cache-dir sentencepiece==0.2.0
RUN pip install --no-cache-dir accelerate==0.25.0
RUN pip install --no-cache-dir TTS==0.22.0
# Fix any NumPy version conflicts
RUN pip install --no-cache-dir --force-reinstall "numpy>=1.23.5,<1.24"
# ---------- Copy Application Files ----------
COPY avatar_streamlit.py /app/
# ---------- Fix NumPy Compatibility in SadTalker ----------
RUN cd /app/SadTalker && \
find . -name "*.py" -type f -exec sed -i 's/\bnp\.float\b/np.float64/g' {} + && \
sed -i 's/warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/# warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/' src/face3d/util/preprocess.py && \
sed -i 's/trans_params = np.array(\[w0, h0, s, t\[0\], t\[1\]\])/trans_params = np.array([w0, h0, s, float(t[0]), float(t[1])])/' src/face3d/util/preprocess.py && \
echo "βœ… SadTalker NumPy fixes applied!"
# ---------- Fix torchvision compatibility in SadTalker ----------
RUN cd /app/SadTalker && \
find . -name "*.py" -type f -exec sed -i 's/from torchvision.transforms.functional_tensor import rgb_to_grayscale/from torchvision.transforms.functional import rgb_to_grayscale/g' {} + && \
find . -name "*.py" -type f -exec sed -i 's/import torchvision.transforms.functional_tensor/import torchvision.transforms.functional/g' {} + && \
echo "βœ… SadTalker torchvision fixes applied!"
# ---------- Preload Coqui Model (Skip on build to save memory) ----------
# Model will download on first run instead
# RUN python3 -c "from TTS.api import TTS; TTS('tts_models/multilingual/multi-dataset/xtts_v2', gpu=False)" || true
# ---------- Expose Port and Run ----------
EXPOSE 8501
CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"]
# FORCE_REBUILD: 2025-01-04-v152