# Use the ultra-compact pre-compiled image FROM samueltallet/alpine-llama-cpp-server:latest # Hugging Face Free Tier settings ENV LLAMA_ARG_HOST=0.0.0.0 ENV LLAMA_ARG_PORT=7860 ENV LLAMA_ARG_THREADS=2 ENV LLAMA_ARG_CTX_SIZE=4096 # Define the Qwen 3.5 model to download and run ENV LLAMA_ARG_HF_REPO=amkkk/Qwen3.5-0.8B-quantized_uncensored_finetuned ENV LLAMA_ARG_HF_FILE=qwen3.5-0.8b-finetuned-ablated-e2-ablation020.Q4_K_M.gguf # Optional: Set an API Key to keep your Space private # ENV LLAMA_API_KEY=your_secret_key_here # Hugging Face needs to know which port to look at EXPOSE 7860 # The image has its own entrypoint that handles the download and server start # We don't need a CMD or ENTRYPOINT here as the base image handles it.