Spaces:
Sleeping
Sleeping
File size: 883 Bytes
65593ac 2c41ad3 d27e6ea ae8a839 e50038f 0bd2a59 6869032 ff04f16 0bd2a59 cecacb4 0bd2a59 ff04f16 e50038f 2c41ad3 e50038f 65593ac eadf049 2c41ad3 a3dfa89 e50038f 428a371 e50038f 2c41ad3 6869032 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | # ClarityGuard - HuggingFace Spaces L4 GPU
# llama-server precompiled locally and uploaded to the repo as a binary.
FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV OMP_NUM_THREADS=8
ENV OMP_PROC_BIND=false
ENV CPU_THREADS=8
ENV LLAMA_CTX=12288
ENV LLAMA_MAX_TOKENS=8192
ENV LLAMA_BATCH=1024
ENV LLAMA_UBATCH=512
ENV LLAMA_GPU_LAYERS=999
ENV MMPROJ_OFFLOAD=1
ENV RAG_TOP_K=4
ENV RAG_MAX_CONTEXT_CHARS=9000
RUN apt-get update && apt-get install -y \
python3 python3-pip \
git git-lfs curl \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*
COPY bin/llama-server /opt/llama-cpp/llama-server
COPY bin/*.so* /usr/local/lib/
RUN chmod +x /opt/llama-cpp/llama-server && ldconfig
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 7860
CMD ["python3", "app.py"]
|