ollama / Dockerfile
privateone's picture
Update Dockerfile
f687698 verified
FROM ollama/ollama:latest
# Install nginx and bash (for scripting) inside the container
# ollama/ollama is likely Debian/Ubuntu-based (check with 'cat /etc/os-release')
# RUN apt-get update && \
# apt-get install -y nginx bash && \
# apt-get clean && \
# rm -rf /var/lib/apt/lists/*
# f16 - High precision, high memory usage (default)
# q8_0 - 8-bit quantization, ~50% memory reduction with minimal quality loss
# q4_0 - 4-bit quantization, ~75% memory reduction with noticeable quality impact
# Set environment variables to configure Ollama
ENV OLLAMA_HOST=0.0.0.0:7860 \
OLLAMA_NOHISTORY=true \
OLLAMA_MULTIUSER_CACHE=false \
OLLAMA_NOPRUNE=true \
OLLAMA_MODELS=/dev/shm/ollama/models \
OLLAMA_TEMP=/dev/shm/ollama/tmp \
OLLAMA_FLASH_ATTENTION=0 \
OLLAMA_KV_CACHE_TYPE=q4_0 \
OLLAMA_NUM_THREADS=2 \
OLLAMA_MAX_MEMORY=16GB \
OLLAMA_KEEP_ALIVE=1m \
OLLAMA_LOAD_TIMEOUT=1m \
OLLAMA_MAX_LOADED_MODELS=2
# OLLAMA_MODEL=tinyllama
# Expose both ports: 8080 for Ollama internally, 7860 for nginx externally
EXPOSE 7860
# Copy your custom nginx config to container
COPY nginx.conf /etc/nginx/nginx.conf
# Copy entrypoint script
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Create RAM directories with permissions
RUN mkdir -p /dev/shm/ollama/models /dev/shm/ollama/tmp && chmod -R 777 /dev/shm/ollama && \
rm -rf /.ollama && mkdir -p /.ollama && chmod -R 777 /.ollama
ENV PYTHONUNBUFFERED=1 \
GRADIO_ALLOW_FLAGGING=never \
GRADIO_NUM_PORTS=1 \
GRADIO_SERVER_NAME=0.0.0.0 \
GRADIO_THEME=huggingface \
SYSTEM=spaces \
SHELL=/bin/bash
CMD ["serve"]
# ENTRYPOINT ["/entrypoint.sh"]