Data_eng_designer / Dockerfile
focustiki's picture
Update Dockerfile
4697172 verified
# ── Hugging Face Spaces / Docker deployment ───────────────────────────────────
# Port 7860 is required for HF Spaces
FROM python:3.11-slim
# System deps for chromadb and sentence-transformers
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc g++ libgomp1 && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python deps first (cached layer)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy all flat-uploaded files
COPY . .
# Reorganise flat upload into the directory structure the app expects:
# static/ β†’ index.html, manifest.json, sw.js
# knowledge/ β†’ data_engineering_patterns.pdf
RUN mkdir -p static knowledge && \
mv index.html manifest.json sw.js static/ 2>/dev/null || true && \
mv data_engineering_patterns.pdf knowledge/ 2>/dev/null || true
# HF Spaces runs as non-root
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
# ── UTF-8 locale ──────────────────────────────────────────────────────────────
# PDF extractors emit Unicode structural chars (U+2028, U+00A0, etc). Without
# these env vars, Python defaults to ASCII inside a minimal slim container and
# httpx raises UnicodeEncodeError when sending them upstream to Groq.
ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
PYTHONIOENCODING=utf-8
EXPOSE 7860
# PORT env var is set automatically by HF Spaces to 7860
ENV PORT=7860
CMD ["python", "app.py"]