sidmaz666 commited on
Commit
5dd06f1
·
verified ·
1 Parent(s): 5abef70

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +11 -8
Dockerfile CHANGED
@@ -6,26 +6,29 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
6
  HF_HOME=/data/.huggingface \
7
  TRANSFORMERS_CACHE=/data/.cache/huggingface \
8
  HF_HUB_ENABLE_HF_TRANSFER=1 \
9
- TOKENIZERS_PARALLELISM=false \
10
- ORT_DISABLE_CPU_AFFINITY=1
11
 
 
12
  RUN apt-get update && apt-get install -y --no-install-recommends \
13
  git \
14
  gcc \
15
  g++ \
16
- libglib2.0-0 \
17
- libsm6 \
18
- libxext6 \
19
- libxrender1 \
20
  && rm -rf /var/lib/apt/lists/*
21
 
22
  WORKDIR /app
23
 
24
  COPY requirements.txt .
25
- RUN pip install --upgrade pip && pip install -r requirements.txt
 
 
 
 
26
 
27
  COPY app.py .
28
 
29
  EXPOSE 7860
30
 
31
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
6
  HF_HOME=/data/.huggingface \
7
  TRANSFORMERS_CACHE=/data/.cache/huggingface \
8
  HF_HUB_ENABLE_HF_TRANSFER=1 \
9
+ TOKENIZERS_PARALLELISM=false
 
10
 
11
+ # Install build dependencies for llama-cpp-python
12
  RUN apt-get update && apt-get install -y --no-install-recommends \
13
  git \
14
  gcc \
15
  g++ \
16
+ cmake \
17
+ libopenblas-dev \
 
 
18
  && rm -rf /var/lib/apt/lists/*
19
 
20
  WORKDIR /app
21
 
22
  COPY requirements.txt .
23
+
24
+ # Build llama-cpp-python with OpenBLAS for maximum CPU performance
25
+ RUN pip install --upgrade pip && \
26
+ CMAKE_ARGS="-DGGML_OPENBLAS=ON" pip install llama-cpp-python && \
27
+ pip install -r requirements.txt
28
 
29
  COPY app.py .
30
 
31
  EXPOSE 7860
32
 
33
+ # Increase the number of threads for Uvicorn
34
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--limit-concurrency", "100"]