File size: 883 Bytes
65593ac
 
2c41ad3
d27e6ea
ae8a839
e50038f
0bd2a59
6869032
 
 
ff04f16
 
0bd2a59
cecacb4
0bd2a59
 
ff04f16
 
e50038f
 
2c41ad3
 
 
e50038f
 
65593ac
 
 
eadf049
2c41ad3
a3dfa89
e50038f
428a371
e50038f
 
 
 
2c41ad3
6869032
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# ClarityGuard - HuggingFace Spaces L4 GPU
# llama-server precompiled locally and uploaded to the repo as a binary.

FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV OMP_NUM_THREADS=8
ENV OMP_PROC_BIND=false
ENV CPU_THREADS=8
ENV LLAMA_CTX=12288
ENV LLAMA_MAX_TOKENS=8192
ENV LLAMA_BATCH=1024
ENV LLAMA_UBATCH=512
ENV LLAMA_GPU_LAYERS=999
ENV MMPROJ_OFFLOAD=1
ENV RAG_TOP_K=4
ENV RAG_MAX_CONTEXT_CHARS=9000

RUN apt-get update && apt-get install -y \
    python3 python3-pip \
    git git-lfs curl \
    libgomp1 \
    && rm -rf /var/lib/apt/lists/*

COPY bin/llama-server /opt/llama-cpp/llama-server
COPY bin/*.so* /usr/local/lib/
RUN chmod +x /opt/llama-cpp/llama-server && ldconfig

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 7860

CMD ["python3", "app.py"]