Spaces:
Paused
Paused
| FROM ollama/ollama:latest | |
| # Install nginx and bash (for scripting) inside the container | |
| # ollama/ollama is likely Debian/Ubuntu-based (check with 'cat /etc/os-release') | |
| # RUN apt-get update && \ | |
| # apt-get install -y nginx bash && \ | |
| # apt-get clean && \ | |
| # rm -rf /var/lib/apt/lists/* | |
| # f16 - High precision, high memory usage (default) | |
| # q8_0 - 8-bit quantization, ~50% memory reduction with minimal quality loss | |
| # q4_0 - 4-bit quantization, ~75% memory reduction with noticeable quality impact | |
| # Set environment variables to configure Ollama | |
| ENV OLLAMA_HOST=0.0.0.0:7860 \ | |
| OLLAMA_NOHISTORY=true \ | |
| OLLAMA_MULTIUSER_CACHE=false \ | |
| OLLAMA_NOPRUNE=true \ | |
| OLLAMA_MODELS=/dev/shm/ollama/models \ | |
| OLLAMA_TEMP=/dev/shm/ollama/tmp \ | |
| OLLAMA_FLASH_ATTENTION=0 \ | |
| OLLAMA_KV_CACHE_TYPE=q4_0 \ | |
| OLLAMA_NUM_THREADS=2 \ | |
| OLLAMA_MAX_MEMORY=16GB \ | |
| OLLAMA_KEEP_ALIVE=1m \ | |
| OLLAMA_LOAD_TIMEOUT=1m \ | |
| OLLAMA_MAX_LOADED_MODELS=2 | |
| # OLLAMA_MODEL=tinyllama | |
| # Expose both ports: 8080 for Ollama internally, 7860 for nginx externally | |
| EXPOSE 7860 | |
| # Copy your custom nginx config to container | |
| COPY nginx.conf /etc/nginx/nginx.conf | |
| # Copy entrypoint script | |
| COPY entrypoint.sh /entrypoint.sh | |
| RUN chmod +x /entrypoint.sh | |
| # Create RAM directories with permissions | |
| RUN mkdir -p /dev/shm/ollama/models /dev/shm/ollama/tmp && chmod -R 777 /dev/shm/ollama && \ | |
| rm -rf /.ollama && mkdir -p /.ollama && chmod -R 777 /.ollama | |
| ENV PYTHONUNBUFFERED=1 \ | |
| GRADIO_ALLOW_FLAGGING=never \ | |
| GRADIO_NUM_PORTS=1 \ | |
| GRADIO_SERVER_NAME=0.0.0.0 \ | |
| GRADIO_THEME=huggingface \ | |
| SYSTEM=spaces \ | |
| SHELL=/bin/bash | |
| CMD ["serve"] | |
| # ENTRYPOINT ["/entrypoint.sh"] | |