File size: 2,026 Bytes
38aa070
 
 
 
 
 
c544d52
 
 
 
 
 
 
 
 
 
38aa070
6806c38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38aa070
 
 
 
 
 
 
 
 
 
 
 
c544d52
 
 
 
 
 
 
 
 
 
 
b70d23b
 
c544d52
 
 
b70d23b
 
c544d52
38aa070
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
FROM ollama/ollama:latest




# Install netcat (nc) for checking server readiness
RUN apt-get update && apt-get install -y \
    netcat-openbsd \
    htop \
    curl \
    wget \
    procps \
    ca-certificates \
    && rm -rf /var/lib/apt/lists/* \
    && apt-get clean \
    && apt-get autoremove -y

# Set core environment variables
ENV OLLAMA_HOST=0.0.0.0:7860
ENV OLLAMA_ORIGINS="*"
ENV OLLAMA_MODELS=/tmp/ollama-models

# Ollama performance optimizations
ENV OLLAMA_NUM_PARALLEL=1
ENV OLLAMA_MAX_LOADED_MODELS=1
ENV OLLAMA_MAX_QUEUE=3
ENV OLLAMA_FLASH_ATTENTION=1
ENV OLLAMA_KEEP_ALIVE=5m
ENV OLLAMA_NOPRUNE=false

# CPU-specific threading optimizations
ENV OMP_NUM_THREADS=4
ENV MKL_NUM_THREADS=4
ENV OPENBLAS_NUM_THREADS=4
ENV VECLIB_MAXIMUM_THREADS=4
ENV NUMEXPR_NUM_THREADS=4
ENV BLAS_NUM_THREADS=4

# Memory and performance tuning
ENV OLLAMA_MAX_VRAM=0
ENV MALLOC_ARENA_MAX=2
ENV MALLOC_MMAP_THRESHOLD_=131072
ENV MALLOC_TRIM_THRESHOLD_=131072
ENV GOMEMLIMIT=10GiB
ENV GOMAXPROCS=4


# GPU disable for CPU-only inference
ENV CUDA_VISIBLE_DEVICES=""
ENV HIP_VISIBLE_DEVICES=""

# Copy scripts before user switch
COPY entrypoint.sh /usr/local/bin/entrypoint.sh

# Set permissions for the entrypoint script
RUN chmod +x /usr/local/bin/entrypoint.sh


# Set Ollama to listen on all network interfaces
ENV OLLAMA_HOST=0.0.0.0:7860

# Expose the default port
EXPOSE 7860

# Create health check script inline to reduce image layers
RUN echo '#!/bin/bash\n\
if ! nc -z localhost 7860 2>/dev/null; then\n\
    echo "UNHEALTHY: Service not responding"\n\
    exit 1\n\
fi\n\
if ! curl -sf --max-time 3 http://localhost:7860/api/version >/dev/null; then\n\
    echo "UNHEALTHY: API not responding"\n\
    exit 1\n\
fi\n\
echo "HEALTHY: Ollama running"\n\
exit 0' > /healthcheck.sh && \
    chmod +x /healthcheck.sh

# Health check configuration
HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=3 \
    CMD /healthcheck.sh


# Use the custom entrypoint script
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]