| # Use the ultra-compact pre-compiled image | |
| FROM samueltallet/alpine-llama-cpp-server:latest | |
| # Hugging Face Free Tier settings | |
| ENV LLAMA_ARG_HOST=0.0.0.0 | |
| ENV LLAMA_ARG_PORT=7860 | |
| ENV LLAMA_ARG_THREADS=2 | |
| ENV LLAMA_ARG_CTX_SIZE=4096 | |
| # Define the Qwen 3.5 model to download and run | |
| ENV LLAMA_ARG_HF_REPO=amkkk/Qwen3.5-0.8B-quantized_uncensored_finetuned | |
| ENV LLAMA_ARG_HF_FILE=qwen3.5-0.8b-finetuned-ablated-e2-ablation020.Q4_K_M.gguf | |
| # Optional: Set an API Key to keep your Space private | |
| # ENV LLAMA_API_KEY=your_secret_key_here | |
| # Hugging Face needs to know which port to look at | |
| EXPOSE 7860 | |
| # The image has its own entrypoint that handles the download and server start | |
| # We don't need a CMD or ENTRYPOINT here as the base image handles it. |