Spaces:
Running
Running
| set -euo pipefail | |
| # Logging functions | |
| log() { echo "[$(date +'%H:%M:%S')] $*"; } | |
| error() { echo "[$(date +'%H:%M:%S')] ERROR: $*" >&2; } | |
| # System resource check | |
| check_system() { | |
| local mem_mb=$(free -m | awk 'NR==2{print $7}') | |
| local cpu_count=$(nproc) | |
| log "Available Memory: ${mem_mb}MB, CPU Cores: ${cpu_count}" | |
| # Adjust threading based on resources | |
| if [ "$mem_mb" -lt 6000 ]; then | |
| export OLLAMA_MAX_QUEUE=2 | |
| log "Low memory detected - reduced queue size to 2" | |
| fi | |
| if [ "$cpu_count" -le 2 ]; then | |
| export OMP_NUM_THREADS=2 | |
| export MKL_NUM_THREADS=2 | |
| log "Limited CPU cores - adjusted thread count" | |
| fi | |
| } | |
| # Wait for service readiness | |
| wait_for_service() { | |
| log "Starting Ollama server..." | |
| ollama serve & | |
| local pid=$! | |
| # Wait up to 60 seconds for service | |
| for i in {1..30}; do | |
| if nc -z localhost 7860 2>/dev/null; then | |
| log "β Ollama service ready on port 7860" | |
| return 0 | |
| fi | |
| sleep 2 | |
| done | |
| error "Service failed to start within 60 seconds" | |
| kill $pid 2>/dev/null || true | |
| return 1 | |
| } | |
| # Model management | |
| setup_model() { | |
| local model="${PRELOAD_MODEL:-}" | |
| if [ -z "$model" ]; then | |
| log "No model preloading specified (set PRELOAD_MODEL env var)" | |
| return 0 | |
| fi | |
| log "Attempting to preload model: $model" | |
| # Try to pull model with timeout | |
| if timeout 300 ollama pull "$model" 2>/dev/null; then | |
| log "β Model $model loaded successfully" | |
| # Quick warmup | |
| echo "test" | timeout 15 ollama run "$model" >/dev/null 2>&1 || true | |
| else | |
| log "β Failed to preload $model - will load on demand" | |
| # Try lightweight alternatives | |
| for fallback in "gemma:2b-instruct-q4_0" "phi:2.7b-chat-v0.2-q4_0"; do | |
| log "Trying fallback: $fallback" | |
| if timeout 180 ollama pull "$fallback" 2>/dev/null; then | |
| log "β Fallback model $fallback loaded" | |
| export DEFAULT_MODEL="$fallback" | |
| break | |
| fi | |
| done | |
| fi | |
| } | |
| # Signal handling | |
| cleanup() { | |
| log "Shutting down gracefully..." | |
| pkill -f "ollama serve" 2>/dev/null || true | |
| exit 0 | |
| } | |
| trap cleanup SIGTERM SIGINT | |
| # Main execution | |
| main() { | |
| log "Starting Ollama with CPU optimizations" | |
| log "Config: PARALLEL=$OLLAMA_NUM_PARALLEL, QUEUE=$OLLAMA_MAX_QUEUE, KEEP_ALIVE=$OLLAMA_KEEP_ALIVE" | |
| check_system | |
| if wait_for_service; then | |
| setup_model | |
| log "π Ollama ready at http://localhost:7860" | |
| log "Send requests to /api/generate or /api/chat endpoints" | |
| # Keep container alive | |
| wait | |
| else | |
| error "Failed to initialize Ollama" | |
| exit 1 | |
| fi | |
| } | |
| main "$@" |