# ClarityGuard - HuggingFace Spaces L4 GPU # llama-server precompiled locally and uploaded to the repo as a binary. FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 ENV OMP_NUM_THREADS=8 ENV OMP_PROC_BIND=false ENV CPU_THREADS=8 ENV LLAMA_CTX=12288 ENV LLAMA_MAX_TOKENS=8192 ENV LLAMA_BATCH=1024 ENV LLAMA_UBATCH=512 ENV LLAMA_GPU_LAYERS=999 ENV MMPROJ_OFFLOAD=1 ENV RAG_TOP_K=4 ENV RAG_MAX_CONTEXT_CHARS=9000 RUN apt-get update && apt-get install -y \ python3 python3-pip \ git git-lfs curl \ libgomp1 \ && rm -rf /var/lib/apt/lists/* COPY bin/llama-server /opt/llama-cpp/llama-server COPY bin/*.so* /usr/local/lib/ RUN chmod +x /opt/llama-cpp/llama-server && ldconfig WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . EXPOSE 7860 CMD ["python3", "app.py"]