ollama-server / Dockerfile
wwforonce's picture
fix use root user
b70d23b
FROM ollama/ollama:latest
# Install netcat (nc) for checking server readiness
RUN apt-get update && apt-get install -y \
netcat-openbsd \
htop \
curl \
wget \
procps \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean \
&& apt-get autoremove -y
# Set core environment variables
ENV OLLAMA_HOST=0.0.0.0:7860
ENV OLLAMA_ORIGINS="*"
ENV OLLAMA_MODELS=/tmp/ollama-models
# Ollama performance optimizations
ENV OLLAMA_NUM_PARALLEL=1
ENV OLLAMA_MAX_LOADED_MODELS=1
ENV OLLAMA_MAX_QUEUE=3
ENV OLLAMA_FLASH_ATTENTION=1
ENV OLLAMA_KEEP_ALIVE=5m
ENV OLLAMA_NOPRUNE=false
# CPU-specific threading optimizations
ENV OMP_NUM_THREADS=4
ENV MKL_NUM_THREADS=4
ENV OPENBLAS_NUM_THREADS=4
ENV VECLIB_MAXIMUM_THREADS=4
ENV NUMEXPR_NUM_THREADS=4
ENV BLAS_NUM_THREADS=4
# Memory and performance tuning
ENV OLLAMA_MAX_VRAM=0
ENV MALLOC_ARENA_MAX=2
ENV MALLOC_MMAP_THRESHOLD_=131072
ENV MALLOC_TRIM_THRESHOLD_=131072
ENV GOMEMLIMIT=10GiB
ENV GOMAXPROCS=4
# GPU disable for CPU-only inference
ENV CUDA_VISIBLE_DEVICES=""
ENV HIP_VISIBLE_DEVICES=""
# Copy scripts before user switch
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
# Set permissions for the entrypoint script
RUN chmod +x /usr/local/bin/entrypoint.sh
# Set Ollama to listen on all network interfaces
ENV OLLAMA_HOST=0.0.0.0:7860
# Expose the default port
EXPOSE 7860
# Create health check script inline to reduce image layers
RUN echo '#!/bin/bash\n\
if ! nc -z localhost 7860 2>/dev/null; then\n\
echo "UNHEALTHY: Service not responding"\n\
exit 1\n\
fi\n\
if ! curl -sf --max-time 3 http://localhost:7860/api/version >/dev/null; then\n\
echo "UNHEALTHY: API not responding"\n\
exit 1\n\
fi\n\
echo "HEALTHY: Ollama running"\n\
exit 0' > /healthcheck.sh && \
chmod +x /healthcheck.sh
# Health check configuration
HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=3 \
CMD /healthcheck.sh
# Use the custom entrypoint script
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]