WORKWITHSHAFISK commited on
Commit
0403a89
·
verified ·
1 Parent(s): 1c5adf4

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +49 -48
Dockerfile CHANGED
@@ -1,48 +1,49 @@
1
- # syntax=docker/dockerfile:1
2
- FROM python:3.11-slim
3
-
4
- # Install build dependencies for llama-cpp-python
5
- RUN apt-get update && apt-get install -y \
6
- cmake \
7
- g++ \
8
- gcc \
9
- libopenblas-dev \
10
- && rm -rf /var/lib/apt/lists/*
11
-
12
- # Set working directory
13
- WORKDIR /app
14
-
15
- # Set environment variables for CPU optimization
16
- # GGML_BLAS enables BLAS acceleration
17
- # GGML_OPENBLAS uses OpenBLAS library for matrix operations (2-3x faster)
18
- ENV CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
19
- ENV FORCE_CMAKE=1
20
-
21
- # Copy requirements first for better Docker layer caching
22
- COPY requirements.txt .
23
-
24
- # Install Python dependencies
25
- # llama-cpp-python will compile from source with CPU optimizations
26
- RUN pip install --no-cache-dir -r requirements.txt
27
-
28
- # Copy application code
29
- COPY main.py .
30
-
31
- # Create cache directory for models
32
- RUN mkdir -p /app/models
33
-
34
- # Expose port 7860 (HuggingFace Space default)
35
- EXPOSE 7860
36
-
37
- # Set environment variables
38
- ENV HOST=0.0.0.0
39
- ENV PORT=7860
40
-
41
- # Health check for HuggingFace monitoring
42
- HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
43
- CMD python -c "import requests; requests.get('http://localhost:7860/health')"
44
-
45
- # Run the FastAPI application with Uvicorn
46
- # workers=1 ensures single process (important for model memory management)
47
- # log-level=info provides detailed logging for debugging
48
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "info"]
 
 
1
+ # syntax=docker/dockerfile:1
2
+ FROM python:3.11-slim
3
+
4
+ # Install build dependencies for llama-cpp-python
5
+ RUN apt-get update && apt-get install -y \
6
+ cmake \
7
+ g++ \
8
+ gcc \
9
+ pkg-config \
10
+ libopenblas-dev \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Set working directory
14
+ WORKDIR /app
15
+
16
+ # Set environment variables for CPU optimization
17
+ # GGML_BLAS enables BLAS acceleration
18
+ # GGML_OPENBLAS uses OpenBLAS library for matrix operations (2-3x faster)
19
+ ENV CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
20
+ ENV FORCE_CMAKE=1
21
+
22
+ # Copy requirements first for better Docker layer caching
23
+ COPY requirements.txt .
24
+
25
+ # Install Python dependencies
26
+ # llama-cpp-python will compile from source with CPU optimizations
27
+ RUN pip install --no-cache-dir -r requirements.txt
28
+
29
+ # Copy application code
30
+ COPY main.py .
31
+
32
+ # Create cache directory for models
33
+ RUN mkdir -p /app/models
34
+
35
+ # Expose port 7860 (HuggingFace Space default)
36
+ EXPOSE 7860
37
+
38
+ # Set environment variables
39
+ ENV HOST=0.0.0.0
40
+ ENV PORT=7860
41
+
42
+ # Health check for HuggingFace monitoring
43
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
44
+ CMD python -c "import requests; requests.get('http://localhost:7860/health')"
45
+
46
+ # Run the FastAPI application with Uvicorn
47
+ # workers=1 ensures single process (important for model memory management)
48
+ # log-level=info provides detailed logging for debugging
49
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "info"]