Spaces:

privateone
/

ollama

Paused

App Files Files Community

ollama / Dockerfile

privateone

Update Dockerfile

f687698 verified 6 months ago

raw

history blame contribute delete

1.68 kB

	FROM ollama/ollama:latest

	# Install nginx and bash (for scripting) inside the container
	# ollama/ollama is likely Debian/Ubuntu-based (check with 'cat /etc/os-release')
	# RUN apt-get update && \
	# apt-get install -y nginx bash && \
	# apt-get clean && \
	# rm -rf /var/lib/apt/lists/*

	# f16 - High precision, high memory usage (default)
	# q8_0 - 8-bit quantization, ~50% memory reduction with minimal quality loss
	# q4_0 - 4-bit quantization, ~75% memory reduction with noticeable quality impact

	# Set environment variables to configure Ollama
	ENV OLLAMA_HOST=0.0.0.0:7860 \
	OLLAMA_NOHISTORY=true \
	OLLAMA_MULTIUSER_CACHE=false \
	OLLAMA_NOPRUNE=true \
	OLLAMA_MODELS=/dev/shm/ollama/models \
	OLLAMA_TEMP=/dev/shm/ollama/tmp \
	OLLAMA_FLASH_ATTENTION=0 \
	OLLAMA_KV_CACHE_TYPE=q4_0 \
	OLLAMA_NUM_THREADS=2 \
	OLLAMA_MAX_MEMORY=16GB \
	OLLAMA_KEEP_ALIVE=1m \
	OLLAMA_LOAD_TIMEOUT=1m \
	OLLAMA_MAX_LOADED_MODELS=2

	# OLLAMA_MODEL=tinyllama

	# Expose both ports: 8080 for Ollama internally, 7860 for nginx externally
	EXPOSE 7860

	# Copy your custom nginx config to container
	COPY nginx.conf /etc/nginx/nginx.conf

	# Copy entrypoint script
	COPY entrypoint.sh /entrypoint.sh
	RUN chmod +x /entrypoint.sh


	# Create RAM directories with permissions
	RUN mkdir -p /dev/shm/ollama/models /dev/shm/ollama/tmp && chmod -R 777 /dev/shm/ollama && \
	rm -rf /.ollama && mkdir -p /.ollama && chmod -R 777 /.ollama


	ENV PYTHONUNBUFFERED=1 \
	GRADIO_ALLOW_FLAGGING=never \
	GRADIO_NUM_PORTS=1 \
	GRADIO_SERVER_NAME=0.0.0.0 \
	GRADIO_THEME=huggingface \
	SYSTEM=spaces \
	SHELL=/bin/bash

	CMD ["serve"]
	# ENTRYPOINT ["/entrypoint.sh"]