focusflow / Dockerfile
SivaRohith69's picture
Create appuser for UID 1000 to fix PyTorch getpwuid KeyError
decd86c
FROM python:3.10-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
curl \
tesseract-ocr \
tesseract-ocr-eng \
poppler-utils \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Pre-download the sentence-transformers embedding model during build
# so it's cached in the image and doesn't need network at runtime
ENV HF_HOME=/app/.cache/huggingface
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
# Copy application code
COPY . .
# Create data directories and user for HuggingFace Spaces (runs as UUID 1000)
# PyTorch requires a named user in /etc/passwd, otherwise getpass.getuser() throws a KeyError
RUN useradd -m -u 1000 appuser && \
mkdir -p data chroma_db logs && \
chown -R appuser:appuser /app
# Specify user for HF Spaces
USER appuser
# Expose ports for backend (8000) and frontend (7860 = HF Spaces default)
EXPOSE 7860 8000
# Set environment to use Hugging Face
ENV LLM_PROVIDER=huggingface
# Fix: TRANSFORMERS_OFFLINE MUST be 1 otherwise the embedder tries to write to the cache dir (which is owned by root)
# and throws a PermissionError, crashing Uvicorn at startup.
ENV TRANSFORMERS_OFFLINE=1
# Fix: HF_HUB_OFFLINE MUST be 0 otherwise the Inference APIs (for generating lessons) are blocked from making network requests.
ENV HF_HUB_OFFLINE=0
# Enable Supabase for persistent storage
ENV USE_SUPABASE=true
# Create production startup script with health checks
RUN echo '#!/bin/bash\n\
set -e\n\
\n\
# OVERRIDE ANY SPACE SETTINGS TO ENSURE INFERENCE API WORKS\n\
export HF_HUB_OFFLINE=0\n\
export TRANSFORMERS_OFFLINE=1\n\
\n\
echo "===== Application Startup at $(date) =====" \n\
echo "=== FocusFlow Startup ===" \n\
\n\
# Wait for DNS/networking to be ready (HF Spaces can be slow)\n\
echo "Waiting for network readiness..." \n\
sleep 3\n\
\n\
echo "Starting backend on port 8000..." \n\
\n\
# Start FastAPI backend\n\
uvicorn backend.main:app --host 0.0.0.0 --port 8000 > logs/backend.log 2>&1 &\n\
BACKEND_PID=$!\n\
echo "Backend started with PID $BACKEND_PID" \n\
\n\
# Wait for backend to be healthy (max 90 seconds)\n\
echo "Waiting for backend health check..." \n\
for i in {1..90}; do\n\
if curl -sf http://localhost:8000/health > /dev/null 2>&1; then\n\
echo "✅ Backend is healthy!" \n\
break\n\
fi\n\
if [ $i -eq 90 ]; then\n\
echo "❌ Backend failed to start. Logs:" \n\
tail -50 logs/backend.log\n\
exit 1\n\
fi\n\
echo "Attempt $i/90 - waiting..." \n\
sleep 1\n\
done\n\
\n\
# Start Streamlit frontend\n\
echo "Starting frontend on port 7860..." \n\
exec streamlit run app.py --server.port 7860 --server.address 0.0.0.0 --server.headless true 2>&1\n\
' > /app/start.sh && chmod +x /app/start.sh
# Run startup script
CMD ["/app/start.sh"]