Spaces:
Sleeping
Sleeping
| # ClarityGuard - HuggingFace Spaces L4 GPU | |
| # llama-server precompiled locally and uploaded to the repo as a binary. | |
| FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04 | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV OMP_NUM_THREADS=8 | |
| ENV OMP_PROC_BIND=false | |
| ENV CPU_THREADS=8 | |
| ENV LLAMA_CTX=12288 | |
| ENV LLAMA_MAX_TOKENS=8192 | |
| ENV LLAMA_BATCH=1024 | |
| ENV LLAMA_UBATCH=512 | |
| ENV LLAMA_GPU_LAYERS=999 | |
| ENV MMPROJ_OFFLOAD=1 | |
| ENV RAG_TOP_K=4 | |
| ENV RAG_MAX_CONTEXT_CHARS=9000 | |
| RUN apt-get update && apt-get install -y \ | |
| python3 python3-pip \ | |
| git git-lfs curl \ | |
| libgomp1 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| COPY bin/llama-server /opt/llama-cpp/llama-server | |
| COPY bin/*.so* /usr/local/lib/ | |
| RUN chmod +x /opt/llama-cpp/llama-server && ldconfig | |
| WORKDIR /app | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| COPY . . | |
| EXPOSE 7860 | |
| CMD ["python3", "app.py"] | |