Spaces:

nxdev-org
/

ollama-server

Running

App Files Files Community

ollama-server / entrypoint.sh

wwforonce

minor fix ollama

6806c38 6 months ago

raw

history blame contribute delete

2.83 kB

	#!/bin/bash
	set -euo pipefail

	# Logging functions
	log() { echo "[$(date +'%H:%M:%S')] $*"; }
	error() { echo "[$(date +'%H:%M:%S')] ERROR: $*" >&2; }

	# System resource check
	check_system() {
	local mem_mb=$(free -m \| awk 'NR==2{print $7}')
	local cpu_count=$(nproc)

	log "Available Memory: ${mem_mb}MB, CPU Cores: ${cpu_count}"

	# Adjust threading based on resources
	if [ "$mem_mb" -lt 6000 ]; then
	export OLLAMA_MAX_QUEUE=2
	log "Low memory detected - reduced queue size to 2"
	fi

	if [ "$cpu_count" -le 2 ]; then
	export OMP_NUM_THREADS=2
	export MKL_NUM_THREADS=2
	log "Limited CPU cores - adjusted thread count"
	fi
	}

	# Wait for service readiness
	wait_for_service() {
	log "Starting Ollama server..."
	ollama serve &
	local pid=$!

	# Wait up to 60 seconds for service
	for i in {1..30}; do
	if nc -z localhost 7860 2>/dev/null; then
	log "✓ Ollama service ready on port 7860"
	return 0
	fi
	sleep 2
	done

	error "Service failed to start within 60 seconds"
	kill $pid 2>/dev/null \|\| true
	return 1
	}

	# Model management
	setup_model() {
	local model="${PRELOAD_MODEL:-}"

	if [ -z "$model" ]; then
	log "No model preloading specified (set PRELOAD_MODEL env var)"
	return 0
	fi

	log "Attempting to preload model: $model"

	# Try to pull model with timeout
	if timeout 300 ollama pull "$model" 2>/dev/null; then
	log "✓ Model $model loaded successfully"
	# Quick warmup
	echo "test" \| timeout 15 ollama run "$model" >/dev/null 2>&1 \|\| true
	else
	log "⚠ Failed to preload $model - will load on demand"

	# Try lightweight alternatives
	for fallback in "gemma:2b-instruct-q4_0" "phi:2.7b-chat-v0.2-q4_0"; do
	log "Trying fallback: $fallback"
	if timeout 180 ollama pull "$fallback" 2>/dev/null; then
	log "✓ Fallback model $fallback loaded"
	export DEFAULT_MODEL="$fallback"
	break
	fi
	done
	fi
	}

	# Signal handling
	cleanup() {
	log "Shutting down gracefully..."
	pkill -f "ollama serve" 2>/dev/null \|\| true
	exit 0
	}
	trap cleanup SIGTERM SIGINT

	# Main execution
	main() {
	log "Starting Ollama with CPU optimizations"
	log "Config: PARALLEL=$OLLAMA_NUM_PARALLEL, QUEUE=$OLLAMA_MAX_QUEUE, KEEP_ALIVE=$OLLAMA_KEEP_ALIVE"

	check_system

	if wait_for_service; then
	setup_model
	log "🚀 Ollama ready at http://localhost:7860"
	log "Send requests to /api/generate or /api/chat endpoints"

	# Keep container alive
	wait
	else
	error "Failed to initialize Ollama"
	exit 1
	fi
	}

	main "$@"