Spaces:

oki692
/

ollama-fastapi-streaming

Sleeping

File size: 1,291 Bytes

350da8b
 
d6cf06e
350da8b
 
 
d6cf06e
350da8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6cf06e
350da8b
 
 
 
 
 
d6cf06e
 
 
350da8b
d6cf06e
350da8b
 
 
 
 
d6cf06e
 
 
 
350da8b
d6cf06e
350da8b

FROM python:3.11-slim

# Install system dependencies including zstd for Ollama
RUN apt-get update && apt-get install -y \
    curl \
    ca-certificates \
    zstd \
    && rm -rf /var/lib/apt/lists/*

# Install Ollama
RUN curl -fsSL https://ollama.com/install.sh | sh

# Set working directory
WORKDIR /app

# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY app.py .

# Create startup script with no-cache options
RUN echo '#!/bin/bash\n\
set -e\n\
echo "Starting Ollama service..."\n\
ollama serve &\n\
OLLAMA_PID=$!\n\
echo "Waiting for Ollama to be ready..."\n\
sleep 8\n\
echo "Pulling model deepseek-r1:1.5b (no cache)..."\n\
OLLAMA_NOHISTORY=1 ollama pull deepseek-r1:1.5b\n\
echo "Model ready. Starting FastAPI server..."\n\
exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 1 --timeout-keep-alive 300 --no-access-log\n\
' > /app/start.sh && chmod +x /app/start.sh

# Expose port
EXPOSE 7860

# Disable Ollama telemetry and history
ENV OLLAMA_NOHISTORY=1
ENV OLLAMA_FLASH_ATTENTION=1

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=90s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Start services
CMD ["/app/start.sh"]