Spaces:

sidmaz666
/

bonsaiapi

Paused

sidmaz666 commited on Apr 20

Commit

5dd06f1

verified ·

1 Parent(s): 5abef70

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -6,26 +6,29 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
     HF_HOME=/data/.huggingface \
     TRANSFORMERS_CACHE=/data/.cache/huggingface \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
-    TOKENIZERS_PARALLELISM=false \
-    ORT_DISABLE_CPU_AFFINITY=1
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     gcc \
     g++ \
-    libglib2.0-0 \
-    libsm6 \
-    libxext6 \
-    libxrender1 \
     && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY requirements.txt .
-RUN pip install --upgrade pip && pip install -r requirements.txt
 COPY app.py .
 EXPOSE 7860
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

     HF_HOME=/data/.huggingface \
     TRANSFORMERS_CACHE=/data/.cache/huggingface \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
+    TOKENIZERS_PARALLELISM=false
+# Install build dependencies for llama-cpp-python
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     gcc \
     g++ \
+    cmake \
+    libopenblas-dev \
     && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY requirements.txt .
+# Build llama-cpp-python with OpenBLAS for maximum CPU performance
+RUN pip install --upgrade pip && \
+    CMAKE_ARGS="-DGGML_OPENBLAS=ON" pip install llama-cpp-python && \
+    pip install -r requirements.txt
 COPY app.py .
 EXPOSE 7860
+# Increase the number of threads for Uvicorn
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--limit-concurrency", "100"]