Spaces:

noodledom
/

focusflow

Sleeping

App Files Files Community

focusflow / Dockerfile

SivaRohith69

Create appuser for UID 1000 to fix PyTorch getpwuid KeyError

decd86c about 1 month ago

raw

history blame contribute delete

3.13 kB

	FROM python:3.10-slim

	WORKDIR /app

	# Install system dependencies
	RUN apt-get update && apt-get install -y \
	build-essential \
	curl \
	tesseract-ocr \
	tesseract-ocr-eng \
	poppler-utils \
	&& rm -rf /var/lib/apt/lists/*

	# Copy requirements and install Python dependencies
	COPY requirements.txt .
	RUN pip install --no-cache-dir -r requirements.txt

	# Pre-download the sentence-transformers embedding model during build
	# so it's cached in the image and doesn't need network at runtime
	ENV HF_HOME=/app/.cache/huggingface
	RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"

	# Copy application code
	COPY . .

	# Create data directories and user for HuggingFace Spaces (runs as UUID 1000)
	# PyTorch requires a named user in /etc/passwd, otherwise getpass.getuser() throws a KeyError
	RUN useradd -m -u 1000 appuser && \
	mkdir -p data chroma_db logs && \
	chown -R appuser:appuser /app

	# Specify user for HF Spaces
	USER appuser

	# Expose ports for backend (8000) and frontend (7860 = HF Spaces default)
	EXPOSE 7860 8000

	# Set environment to use Hugging Face
	ENV LLM_PROVIDER=huggingface

	# Fix: TRANSFORMERS_OFFLINE MUST be 1 otherwise the embedder tries to write to the cache dir (which is owned by root)
	# and throws a PermissionError, crashing Uvicorn at startup.
	ENV TRANSFORMERS_OFFLINE=1

	# Fix: HF_HUB_OFFLINE MUST be 0 otherwise the Inference APIs (for generating lessons) are blocked from making network requests.
	ENV HF_HUB_OFFLINE=0

	# Enable Supabase for persistent storage
	ENV USE_SUPABASE=true

	# Create production startup script with health checks
	RUN echo '#!/bin/bash\n\
	set -e\n\
	\n\
	# OVERRIDE ANY SPACE SETTINGS TO ENSURE INFERENCE API WORKS\n\
	export HF_HUB_OFFLINE=0\n\
	export TRANSFORMERS_OFFLINE=1\n\
	\n\
	echo "===== Application Startup at $(date) =====" \n\
	echo "=== FocusFlow Startup ===" \n\
	\n\
	# Wait for DNS/networking to be ready (HF Spaces can be slow)\n\
	echo "Waiting for network readiness..." \n\
	sleep 3\n\
	\n\
	echo "Starting backend on port 8000..." \n\
	\n\
	# Start FastAPI backend\n\
	uvicorn backend.main:app --host 0.0.0.0 --port 8000 > logs/backend.log 2>&1 &\n\
	BACKEND_PID=$!\n\
	echo "Backend started with PID $BACKEND_PID" \n\
	\n\
	# Wait for backend to be healthy (max 90 seconds)\n\
	echo "Waiting for backend health check..." \n\
	for i in {1..90}; do\n\
	if curl -sf http://localhost:8000/health > /dev/null 2>&1; then\n\
	echo "✅ Backend is healthy!" \n\
	break\n\
	fi\n\
	if [ $i -eq 90 ]; then\n\
	echo "❌ Backend failed to start. Logs:" \n\
	tail -50 logs/backend.log\n\
	exit 1\n\
	fi\n\
	echo "Attempt $i/90 - waiting..." \n\
	sleep 1\n\
	done\n\
	\n\
	# Start Streamlit frontend\n\
	echo "Starting frontend on port 7860..." \n\
	exec streamlit run app.py --server.port 7860 --server.address 0.0.0.0 --server.headless true 2>&1\n\
	' > /app/start.sh && chmod +x /app/start.sh

	# Run startup script
	CMD ["/app/start.sh"]