FROM ollama/ollama:latest # Install netcat (nc) for checking server readiness RUN apt-get update && apt-get install -y \ netcat-openbsd \ htop \ curl \ wget \ procps \ ca-certificates \ && rm -rf /var/lib/apt/lists/* \ && apt-get clean \ && apt-get autoremove -y # Set core environment variables ENV OLLAMA_HOST=0.0.0.0:7860 ENV OLLAMA_ORIGINS="*" ENV OLLAMA_MODELS=/tmp/ollama-models # Ollama performance optimizations ENV OLLAMA_NUM_PARALLEL=1 ENV OLLAMA_MAX_LOADED_MODELS=1 ENV OLLAMA_MAX_QUEUE=3 ENV OLLAMA_FLASH_ATTENTION=1 ENV OLLAMA_KEEP_ALIVE=5m ENV OLLAMA_NOPRUNE=false # CPU-specific threading optimizations ENV OMP_NUM_THREADS=4 ENV MKL_NUM_THREADS=4 ENV OPENBLAS_NUM_THREADS=4 ENV VECLIB_MAXIMUM_THREADS=4 ENV NUMEXPR_NUM_THREADS=4 ENV BLAS_NUM_THREADS=4 # Memory and performance tuning ENV OLLAMA_MAX_VRAM=0 ENV MALLOC_ARENA_MAX=2 ENV MALLOC_MMAP_THRESHOLD_=131072 ENV MALLOC_TRIM_THRESHOLD_=131072 ENV GOMEMLIMIT=10GiB ENV GOMAXPROCS=4 # GPU disable for CPU-only inference ENV CUDA_VISIBLE_DEVICES="" ENV HIP_VISIBLE_DEVICES="" # Copy scripts before user switch COPY entrypoint.sh /usr/local/bin/entrypoint.sh # Set permissions for the entrypoint script RUN chmod +x /usr/local/bin/entrypoint.sh # Set Ollama to listen on all network interfaces ENV OLLAMA_HOST=0.0.0.0:7860 # Expose the default port EXPOSE 7860 # Create health check script inline to reduce image layers RUN echo '#!/bin/bash\n\ if ! nc -z localhost 7860 2>/dev/null; then\n\ echo "UNHEALTHY: Service not responding"\n\ exit 1\n\ fi\n\ if ! curl -sf --max-time 3 http://localhost:7860/api/version >/dev/null; then\n\ echo "UNHEALTHY: API not responding"\n\ exit 1\n\ fi\n\ echo "HEALTHY: Ollama running"\n\ exit 0' > /healthcheck.sh && \ chmod +x /healthcheck.sh # Health check configuration HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=3 \ CMD /healthcheck.sh # Use the custom entrypoint script ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]