Spaces:

oki692
/

ollama-fastapi-streaming

Sleeping

oki692 commited on 17 days ago

Commit

d6cf06e

verified ·

1 Parent(s): ca418d0

Upload Dockerfile with huggingface_hub

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,9 +1,10 @@
 FROM python:3.11-slim
-# Install system dependencies and Ollama
 RUN apt-get update && apt-get install -y \
     curl \
     ca-certificates \
     && rm -rf /var/lib/apt/lists/*
 # Install Ollama
@@ -19,25 +20,29 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY app.py .
-# Create startup script
 RUN echo '#!/bin/bash\n\
 set -e\n\
 echo "Starting Ollama service..."\n\
 ollama serve &\n\
 OLLAMA_PID=$!\n\
 echo "Waiting for Ollama to be ready..."\n\
-sleep 5\n\
-echo "Pulling model deepseek-r1:1.5b..."\n\
-ollama pull deepseek-r1:1.5b\n\
 echo "Model ready. Starting FastAPI server..."\n\
-exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 1 --timeout-keep-alive 300\n\
 ' > /app/start.sh && chmod +x /app/start.sh
 # Expose port
 EXPOSE 7860
 # Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
     CMD curl -f http://localhost:7860/health || exit 1
 # Start services

 FROM python:3.11-slim
+# Install system dependencies including zstd for Ollama
 RUN apt-get update && apt-get install -y \
     curl \
     ca-certificates \
+    zstd \
     && rm -rf /var/lib/apt/lists/*
 # Install Ollama
 # Copy application code
 COPY app.py .
+# Create startup script with no-cache options
 RUN echo '#!/bin/bash\n\
 set -e\n\
 echo "Starting Ollama service..."\n\
 ollama serve &\n\
 OLLAMA_PID=$!\n\
 echo "Waiting for Ollama to be ready..."\n\
+sleep 8\n\
+echo "Pulling model deepseek-r1:1.5b (no cache)..."\n\
+OLLAMA_NOHISTORY=1 ollama pull deepseek-r1:1.5b\n\
 echo "Model ready. Starting FastAPI server..."\n\
+exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 1 --timeout-keep-alive 300 --no-access-log\n\
 ' > /app/start.sh && chmod +x /app/start.sh
 # Expose port
 EXPOSE 7860
+# Disable Ollama telemetry and history
+ENV OLLAMA_NOHISTORY=1
+ENV OLLAMA_FLASH_ATTENTION=1
 # Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=90s --retries=3 \
     CMD curl -f http://localhost:7860/health || exit 1
 # Start services