FROM node:22-bookworm AS frontend WORKDIR /app COPY package*.json ./ RUN npm install COPY . . RUN npm run build FROM python:3.11-slim ENV PORT=7860 \ LLM_BACKEND=llamacpp \ TEXT_MODEL=Qwen/Qwen3-1.7B \ GGUF_MODEL_REPO=bartowski/Qwen_Qwen3-1.7B-GGUF \ GGUF_MODEL_FILE=Qwen_Qwen3-1.7B-Q4_K_M.gguf \ LLAMA_CPP_N_CTX=1024 \ LLAMA_CPP_N_THREADS=2 \ ASR_MODEL=openai/whisper-tiny \ KOKORO_LANG_CODE=z \ KOKORO_VOICE=zf_xiaobei \ MAX_NEW_TOKENS=120 \ LLM_API_BASE_URL=https://api.deepseek.com \ LLM_API_MODEL=deepseek-v4-flash \ HF_HOME=/app/.cache/huggingface \ HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface/hub \ TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers WORKDIR /app RUN apt-get update \ && apt-get install -y --no-install-recommends \ ffmpeg \ git \ espeak-ng \ build-essential \ cmake \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir --upgrade pip setuptools wheel \ && pip install --no-cache-dir -r requirements.txt \ && pip install --no-cache-dir --prefer-binary \ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \ llama-cpp-python RUN python - <<'PY' import os from huggingface_hub import hf_hub_download path = hf_hub_download( repo_id=os.environ["GGUF_MODEL_REPO"], filename=os.environ["GGUF_MODEL_FILE"], ) print("Downloaded model to:", path) PY COPY app.py . COPY --from=frontend /app/dist ./dist EXPOSE 7860 CMD ["python", "app.py"]