FROM ollama/ollama:latest # Install nginx and bash (for scripting) inside the container # ollama/ollama is likely Debian/Ubuntu-based (check with 'cat /etc/os-release') # RUN apt-get update && \ # apt-get install -y nginx bash && \ # apt-get clean && \ # rm -rf /var/lib/apt/lists/* # f16 - High precision, high memory usage (default) # q8_0 - 8-bit quantization, ~50% memory reduction with minimal quality loss # q4_0 - 4-bit quantization, ~75% memory reduction with noticeable quality impact # Set environment variables to configure Ollama ENV OLLAMA_HOST=0.0.0.0:7860 \ OLLAMA_NOHISTORY=true \ OLLAMA_MULTIUSER_CACHE=false \ OLLAMA_NOPRUNE=true \ OLLAMA_MODELS=/dev/shm/ollama/models \ OLLAMA_TEMP=/dev/shm/ollama/tmp \ OLLAMA_FLASH_ATTENTION=0 \ OLLAMA_KV_CACHE_TYPE=q4_0 \ OLLAMA_NUM_THREADS=2 \ OLLAMA_MAX_MEMORY=16GB \ OLLAMA_KEEP_ALIVE=1m \ OLLAMA_LOAD_TIMEOUT=1m \ OLLAMA_MAX_LOADED_MODELS=2 # OLLAMA_MODEL=tinyllama # Expose both ports: 8080 for Ollama internally, 7860 for nginx externally EXPOSE 7860 # Copy your custom nginx config to container COPY nginx.conf /etc/nginx/nginx.conf # Copy entrypoint script COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh # Create RAM directories with permissions RUN mkdir -p /dev/shm/ollama/models /dev/shm/ollama/tmp && chmod -R 777 /dev/shm/ollama && \ rm -rf /.ollama && mkdir -p /.ollama && chmod -R 777 /.ollama ENV PYTHONUNBUFFERED=1 \ GRADIO_ALLOW_FLAGGING=never \ GRADIO_NUM_PORTS=1 \ GRADIO_SERVER_NAME=0.0.0.0 \ GRADIO_THEME=huggingface \ SYSTEM=spaces \ SHELL=/bin/bash CMD ["serve"] # ENTRYPOINT ["/entrypoint.sh"]