# Use the ultra-compact pre-compiled image
FROM samueltallet/alpine-llama-cpp-server:latest

# Hugging Face Free Tier settings
ENV LLAMA_ARG_HOST=0.0.0.0
ENV LLAMA_ARG_PORT=7860
ENV LLAMA_ARG_THREADS=2
ENV LLAMA_ARG_CTX_SIZE=4096

# Define the Qwen 3.5 model to download and run
ENV LLAMA_ARG_HF_REPO=amkkk/Qwen3.5-0.8B-quantized_uncensored_finetuned
ENV LLAMA_ARG_HF_FILE=qwen3.5-0.8b-finetuned-ablated-e2-ablation020.Q4_K_M.gguf

# Optional: Set an API Key to keep your Space private
# ENV LLAMA_API_KEY=your_secret_key_here

# Hugging Face needs to know which port to look at
EXPOSE 7860

# The image has its own entrypoint that handles the download and server start
# We don't need a CMD or ENTRYPOINT here as the base image handles it.