santanche's picture
refactor (model): replacing phi3 by phi4-mini
f369a7d
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies including zstd for Ollama
RUN apt-get update && apt-get install -y \
curl \
zstd \
&& rm -rf /var/lib/apt/lists/*
# Install Ollama
RUN curl -fsSL https://ollama.ai/install.sh | sh
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy application files
COPY server.py .
COPY static/ ./static/
# Create directory for Ollama models
RUN mkdir -p /root/.ollama
# Expose port (HuggingFace uses 7860)
EXPOSE 7860
# Set environment variables
ENV OLLAMA_HOST=0.0.0.0:11434
ENV PYTHONUNBUFFERED=1
# Create startup script - only phi4-mini and biomistral for free tier
RUN echo '#!/bin/bash\n\
set -e\n\
\n\
echo "Starting Ollama server..."\n\
ollama serve &\n\
OLLAMA_PID=$!\n\
\n\
echo "Waiting for Ollama to be ready..."\n\
sleep 10\n\
\n\
echo "Pulling phi4-mini model..."\n\
ollama pull phi4-mini\n\
\n\
echo "Pulling MedGemma model..."\n\
ollama pull MedAIBase/MedGemma1.5:4b\n\
\n\
echo "Pulling DeepSeek Coder model..."\n\
ollama pull deepseek-coder:1.3b\n\
\n\
echo "NER models will be downloaded via transformers on first use"\n\
\n\
echo "Models ready! Starting FastAPI server..."\n\
exec uvicorn server:app --host 0.0.0.0 --port 7860\n\
' > /app/start.sh && chmod +x /app/start.sh
# Run the startup script
CMD ["/app/start.sh"]