# ================================================================
#   Avatar App - SadTalker + Coqui TTS (CPU version, HF-ready)
#   Optimized for Hugging Face Spaces Free Tier
# ================================================================

# ---------- Base Image ----------
FROM python:3.10-slim

ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /app

# ---------- System Dependencies ----------
RUN apt-get update && apt-get install -y \
    ffmpeg git wget curl build-essential \
    espeak espeak-data libespeak-dev \
    unzip \
    && rm -rf /var/lib/apt/lists/*

# ---------- Environment Variables ----------
ENV PYTHONUNBUFFERED=1
ENV COQUI_TOS_AGREED=1
ENV CUDA_VISIBLE_DEVICES=""
ENV DEVICE=cpu
ENV PYTORCH_ENABLE_MPS_FALLBACK=1

# ---------- Install Python Dependencies (Memory-optimized order) ----------
RUN pip install --no-cache-dir --upgrade pip setuptools wheel

# Install NumPy with version that satisfies all dependencies
RUN pip install --no-cache-dir "numpy>=1.23.5,<1.24"

# CPU-only PyTorch (use version compatible with TTS library)
# PyTorch 2.6+ breaks TTS due to weights_only=True default, so use 2.5.1
RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu

# Core dependencies - install in smaller batches
RUN pip install --no-cache-dir scipy==1.11.3
RUN pip install --no-cache-dir scikit-learn==1.5.0
RUN pip install --no-cache-dir librosa==0.10.2.post1
RUN pip install --no-cache-dir Cython==3.0.10

# Streamlit and lighter dependencies
RUN pip install --no-cache-dir \
    streamlit \
    pydub \
    matplotlib \
    requests \
    python-dotenv \
    pyttsx3

# ---------- Clone SadTalker ----------
RUN git clone --depth 1 https://github.com/OpenTalker/SadTalker.git /app/SadTalker

# ---------- Download SadTalker Checkpoints ----------
RUN cd /app/SadTalker && \
    mkdir -p checkpoints && \
    echo "📦 Downloading SadTalker model checkpoints..." && \
    wget --progress=bar:force:noscroll --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 \
    https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/checkpoints.zip -O checkpoints.zip && \
    echo "📦 Extracting checkpoints..." && \
    unzip -o checkpoints.zip && \
    rm checkpoints.zip && \
    echo "✅ Checkpoints downloaded successfully" && \
    ls -lh checkpoints/

# Install SadTalker requirements with correct NumPy
RUN cd /app/SadTalker && \
    sed -i 's/numpy.*/numpy>=1.23.5,<1.24/' requirements.txt && \
    pip install --no-cache-dir -r requirements.txt

# ---------- Install Coqui TTS Stack (in stages to manage memory) ----------
RUN pip install --no-cache-dir transformers==4.36.2
RUN pip install --no-cache-dir sentencepiece==0.2.0
RUN pip install --no-cache-dir accelerate==0.25.0
RUN pip install --no-cache-dir TTS==0.22.0

# Fix any NumPy version conflicts
RUN pip install --no-cache-dir --force-reinstall "numpy>=1.23.5,<1.24"

# ---------- Copy Application Files ----------
COPY avatar_streamlit.py /app/

# ---------- Fix NumPy Compatibility in SadTalker ----------
RUN cd /app/SadTalker && \
    find . -name "*.py" -type f -exec sed -i 's/\bnp\.float\b/np.float64/g' {} + && \
    sed -i 's/warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/# warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/' src/face3d/util/preprocess.py && \
    sed -i 's/trans_params = np.array(\[w0, h0, s, t\[0\], t\[1\]\])/trans_params = np.array([w0, h0, s, float(t[0]), float(t[1])])/' src/face3d/util/preprocess.py && \
    echo "✅ SadTalker NumPy fixes applied!"

# ---------- Fix torchvision compatibility in SadTalker ----------
RUN cd /app/SadTalker && \
    find . -name "*.py" -type f -exec sed -i 's/from torchvision.transforms.functional_tensor import rgb_to_grayscale/from torchvision.transforms.functional import rgb_to_grayscale/g' {} + && \
    find . -name "*.py" -type f -exec sed -i 's/import torchvision.transforms.functional_tensor/import torchvision.transforms.functional/g' {} + && \
    echo "✅ SadTalker torchvision fixes applied!"

# ---------- Preload Coqui Model (Skip on build to save memory) ----------
# Model will download on first run instead
# RUN python3 -c "from TTS.api import TTS; TTS('tts_models/multilingual/multi-dataset/xtts_v2', gpu=False)" || true

# ---------- Expose Port and Run ----------
EXPOSE 8501
CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"]

# FORCE_REBUILD: 2025-01-04-v152