viskav commited on
Commit
0a29f38
·
verified ·
1 Parent(s): 67a0f44

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +13 -6
Dockerfile CHANGED
@@ -2,23 +2,30 @@ FROM python:3.10-slim
2
 
3
  WORKDIR /code
4
 
5
- # Install build dependencies for llama-cpp-python
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
- git \
9
  cmake \
 
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  # Copy requirements first
13
  COPY requirements.txt .
14
 
15
- # Install with specific BLAS backend for better performance
16
- RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" \
17
- pip install --no-cache-dir -r requirements.txt
18
 
19
  # Copy app
20
  COPY app.py .
21
 
 
 
 
 
 
 
22
  EXPOSE 7860
23
 
24
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
2
 
3
  WORKDIR /code
4
 
5
+ # Install ONLY essential build dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
 
8
  cmake \
9
+ libopenblas-dev \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  # Copy requirements first
13
  COPY requirements.txt .
14
 
15
+ # Install with OpenBLAS optimization (CRITICAL FOR SPEED)
16
+ ENV CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
 
19
  # Copy app
20
  COPY app.py .
21
 
22
+ # Environment optimizations
23
+ ENV PYTHONUNBUFFERED=1
24
+ ENV N_THREADS=8
25
+ ENV N_CTX=512
26
+ ENV N_BATCH=256
27
+
28
  EXPOSE 7860
29
 
30
+ # Use uvicorn directly with optimized workers
31
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]