Spaces:
Sleeping
Sleeping
File size: 4,661 Bytes
bff1348 001e605 bff1348 cecde1f bff1348 a25f0d4 bff1348 a25f0d4 bff1348 a25f0d4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | # Dockerfile β Picarones
# Image Docker multi-Γ©tape avec Tesseract OCR prΓ©-installΓ©
#
# Usage :
# docker build -t picarones:latest .
# docker run -p 8000:8000 picarones:latest
# docker run -p 8000:8000 -v $(pwd)/corpus:/app/corpus picarones:latest
#
# Variables d'environnement supportΓ©es :
# OPENAI_API_KEY, ANTHROPIC_API_KEY, MISTRAL_API_KEY
# GOOGLE_APPLICATION_CREDENTIALS
# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION
# AZURE_DOC_INTEL_ENDPOINT, AZURE_DOC_INTEL_KEY
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Γtape 1 : builder β installe les dΓ©pendances Python dans un venv
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
FROM python:3.11-slim AS builder
WORKDIR /app
# Dépendances système pour la compilation
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
git \
&& rm -rf /var/lib/apt/lists/*
# Copier les fichiers de configuration du package
COPY pyproject.toml .
COPY README.md .
COPY picarones/ picarones/
# CrΓ©er un venv isolΓ© et installer Picarones avec les extras web
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
RUN pip install --upgrade pip && \
pip install -e ".[web,llm]" && \
pip cache purge
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Γtape 2 : runtime β image finale lΓ©gΓ¨re avec Tesseract
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
FROM python:3.11-slim AS runtime
LABEL description="Picarones β Plateforme de comparaison de moteurs OCR pour documents patrimoniaux"
LABEL version="1.0.0"
LABEL org.opencontainers.image.source="https://github.com/maribakulj/Picarones"
LABEL org.opencontainers.image.licenses="Apache-2.0"
WORKDIR /app
# ββ DΓ©pendances systΓ¨me βββββββββββββββββββββββββββββββββββββββββ
RUN apt-get update && apt-get install -y --no-install-recommends \
# Tesseract OCR 5 et modèles de langues
tesseract-ocr \
tesseract-ocr-fra \
tesseract-ocr-lat \
tesseract-ocr-eng \
tesseract-ocr-deu \
tesseract-ocr-ita \
tesseract-ocr-spa \
# Bibliothèques image pour Pillow
libpng16-16 \
libjpeg62-turbo \
libtiff6 \
libwebp7 \
# Utilitaires
curl \
&& rm -rf /var/lib/apt/lists/*
# ββ Venv Python depuis le builder ββββββββββββββββββββββββββββββ
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# ββ Code source de l'application βββββββββββββββββββββββββββββββ
COPY --from=builder /app /app
# ββ RΓ©pertoires de donnΓ©es ββββββββββββββββββββββββββββββββββββββ
RUN mkdir -p /app/corpus /app/rapports /app/data
# ββ Utilisateur non-root pour la sΓ©curitΓ© ββββββββββββββββββββββ
RUN useradd -m -u 1000 picarones && \
chown -R picarones:picarones /app
USER picarones
# ββ Variables d'environnement par dΓ©faut βββββββββββββββββββββββ
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=utf-8
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
# ββ Ports βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
EXPOSE 8000
EXPOSE 7860
# ββ Health check ββββββββββββββββββββββββββββββββββββββββββββββββ
HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# ββ DΓ©marrage βββββββββββββββββββββββββββββββββββββββββββββββββββ
CMD ["picarones", "serve", "--host", "0.0.0.0", "--port", "7860"]
|