File size: 3,125 Bytes
d5f1e20
 
 
 
 
 
 
 
2c32e38
 
 
d5f1e20
 
 
 
 
 
3c23e28
 
 
 
 
d5f1e20
 
 
decd86c
 
 
 
 
66f3f21
 
decd86c
d5f1e20
3aa5a30
 
d5f1e20
 
 
 
57e6212
 
 
 
 
87ee24d
57e6212
479eeb5
 
 
76a5867
d5f1e20
76a5867
23d2ce2
57e6212
 
 
 
7856594
76a5867
3c23e28
 
 
 
 
76a5867
 
 
 
 
 
 
3c23e28
76a5867
3c23e28
 
76a5867
 
 
3c23e28
76a5867
 
 
 
3c23e28
76a5867
 
23d2ce2
 
3aa5a30
 
23d2ce2
d5f1e20
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
FROM python:3.10-slim

WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    tesseract-ocr \
    tesseract-ocr-eng \
    poppler-utils \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Pre-download the sentence-transformers embedding model during build
# so it's cached in the image and doesn't need network at runtime
ENV HF_HOME=/app/.cache/huggingface
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"

# Copy application code
COPY . .

# Create data directories and user for HuggingFace Spaces (runs as UUID 1000)
# PyTorch requires a named user in /etc/passwd, otherwise getpass.getuser() throws a KeyError
RUN useradd -m -u 1000 appuser && \
    mkdir -p data chroma_db logs && \
    chown -R appuser:appuser /app

# Specify user for HF Spaces
USER appuser

# Expose ports for backend (8000) and frontend (7860 = HF Spaces default)
EXPOSE 7860 8000

# Set environment to use Hugging Face
ENV LLM_PROVIDER=huggingface

# Fix: TRANSFORMERS_OFFLINE MUST be 1 otherwise the embedder tries to write to the cache dir (which is owned by root)
# and throws a PermissionError, crashing Uvicorn at startup.
ENV TRANSFORMERS_OFFLINE=1

# Fix: HF_HUB_OFFLINE MUST be 0 otherwise the Inference APIs (for generating lessons) are blocked from making network requests.
ENV HF_HUB_OFFLINE=0

# Enable Supabase for persistent storage
ENV USE_SUPABASE=true

# Create production startup script with health checks
RUN echo '#!/bin/bash\n\
    set -e\n\
    \n\
    # OVERRIDE ANY SPACE SETTINGS TO ENSURE INFERENCE API WORKS\n\
    export HF_HUB_OFFLINE=0\n\
    export TRANSFORMERS_OFFLINE=1\n\
    \n\
    echo "===== Application Startup at $(date) =====" \n\
    echo "=== FocusFlow Startup ===" \n\
    \n\
    # Wait for DNS/networking to be ready (HF Spaces can be slow)\n\
    echo "Waiting for network readiness..." \n\
    sleep 3\n\
    \n\
    echo "Starting backend on port 8000..." \n\
    \n\
    # Start FastAPI backend\n\
    uvicorn backend.main:app --host 0.0.0.0 --port 8000 > logs/backend.log 2>&1 &\n\
    BACKEND_PID=$!\n\
    echo "Backend started with PID $BACKEND_PID" \n\
    \n\
    # Wait for backend to be healthy (max 90 seconds)\n\
    echo "Waiting for backend health check..." \n\
    for i in {1..90}; do\n\
    if curl -sf http://localhost:8000/health > /dev/null 2>&1; then\n\
    echo "✅ Backend is healthy!" \n\
    break\n\
    fi\n\
    if [ $i -eq 90 ]; then\n\
    echo "❌ Backend failed to start. Logs:" \n\
    tail -50 logs/backend.log\n\
    exit 1\n\
    fi\n\
    echo "Attempt $i/90 - waiting..." \n\
    sleep 1\n\
    done\n\
    \n\
    # Start Streamlit frontend\n\
    echo "Starting frontend on port 7860..." \n\
    exec streamlit run app.py --server.port 7860 --server.address 0.0.0.0 --server.headless true 2>&1\n\
    ' > /app/start.sh && chmod +x /app/start.sh

# Run startup script
CMD ["/app/start.sh"]