FROM node:22-bookworm AS frontend

WORKDIR /app
COPY package*.json ./
RUN npm install
COPY . .
RUN npm run build

FROM python:3.11-slim

ENV PORT=7860 \
    LLM_BACKEND=llamacpp \
    TEXT_MODEL=Qwen/Qwen3-1.7B \
    GGUF_MODEL_REPO=bartowski/Qwen_Qwen3-1.7B-GGUF \
    GGUF_MODEL_FILE=Qwen_Qwen3-1.7B-Q4_K_M.gguf \
    LLAMA_CPP_N_CTX=1024 \
    LLAMA_CPP_N_THREADS=2 \
    ASR_MODEL=openai/whisper-tiny \
    KOKORO_LANG_CODE=z \
    KOKORO_VOICE=zf_xiaobei \
    MAX_NEW_TOKENS=120 \
    LLM_API_BASE_URL=https://api.deepseek.com \
    LLM_API_MODEL=deepseek-v4-flash \
    HF_HOME=/app/.cache/huggingface \
    HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface/hub \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers

WORKDIR /app

RUN apt-get update \
    && apt-get install -y --no-install-recommends \
        ffmpeg \
        git \
        espeak-ng \
        build-essential \
        cmake \
    && rm -rf /var/lib/apt/lists/*

COPY requirements.txt .

RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
    && pip install --no-cache-dir -r requirements.txt \
    && pip install --no-cache-dir --prefer-binary \
        --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
        llama-cpp-python

RUN python - <<'PY'
import os
from huggingface_hub import hf_hub_download

path = hf_hub_download(
    repo_id=os.environ["GGUF_MODEL_REPO"],
    filename=os.environ["GGUF_MODEL_FILE"],
)
print("Downloaded model to:", path)
PY

COPY app.py .
COPY --from=frontend /app/dist ./dist

EXPOSE 7860
CMD ["python", "app.py"]