Spaces:

KJ24
/

chunking-intelligent-api

Runtime error

File size: 4,173 Bytes

e14f7dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51e4a18
 
 
15de6c6
51e4a18
42b080f
 
51e4a18
 
42b080f
e14f7dd
 
 
42b080f
c7d652f
 
e14f7dd
 
ad26762
598bc58
9a32138
 
 
 
e14f7dd
df35f26
e14f7dd
3ba134a
 
e14f7dd
 
 
 
3ba134a
60d87a4
9a32138
9f42b62
7af8233
09ded5e
1b8acd8
8fb95a6
 
0b09399
 
 
 
029812a
1a90bc4
09ded5e
9a32138
 
09ded5e
5f6abe3
78a4510
e34949a
9a8b92e
 
04b6df3
9a8b92e
1b8acd8
09ded5e
7525721
 
 
 
 
 
 
 
 
 
78a4510
 
7525721
 
 
 
 
46988f8
d58ce7f
78a4510
 
deaf4f1
7525721
 
e34949a
 
 
7bf3795
e34949a
cfc49da
e14f7dd
 
 
 
 
3ba134a
e14f7dd
 
3ba134a
e14f7dd
 
ad26762
43769f9
e204773
43769f9
e14f7dd
 
ad26762
390cd75
 
 
e14f7dd
3ba134a
ad26762
e14f7dd
da2d4a7

# ================================================================
# 🐳 Dockerfile - Hugging Face Space (SDK Docker)
# ================================================================
# Objectif : API FastAPI + Chunking sémantique intelligent
# Compatible : HF Space gratuit (2GB RAM, CPU only)
# Port requis : 7860 (imposé par Hugging Face)
# ================================================================

# ===== BASE IMAGE LÉGÈRE =====
FROM python:3.10-slim

# ===== VARIABLES D'ENVIRONNEMENT OPTIMISÉES =====
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV PIP_NO_CACHE_DIR=1
ENV PIP_DISABLE_PIP_VERSION_CHECK=1

# ✅ Configuration cache HuggingFace pour Space gratuit

ENV HF_HOME=/tmp/hf
ENV TRANSFORMERS_CACHE=/tmp/hf
ENV HF_HUB_CACHE=/tmp/hf

# ENV HF_HOME=/app/cache/huggingface
# ENV TRANSFORMERS_CACHE=/app/cache/transformers


# ENV HF_HUB_CACHE=/app/cache/hub
ENV TOKENIZERS_PARALLELISM=false
ENV HF_HUB_DISABLE_PROGRESS_BARS=1

ENV SENTENCE_TRANSFORMERS_HOME=/tmp/hf


# ===== RÉPERTOIRE DE TRAVAIL =====
WORKDIR /app


# Copier ton fichier requirements.txt dans le dossier /app
COPY requirements.txt /app/


# ===== INSTALLATION DÉPENDANCES SYSTÈME MINIMALES =====

RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    git \
    curl \
    && rm -rf /var/lib/apt/lists/* \
    && rm -rf /tmp/* \
    && rm -rf /var/tmp/*


# 🧪 Étape 3 – Forcer tokenizers==0.19.0 avant le reste
# RUN pip install tokenizers==0.19.0 --no-deps

# 💥 Installer les dépendances Python (sauf chonkie)

RUN pip install -r requirements.txt






RUN pip install --upgrade pip



# ✅ Installer tokenizers MANUELLEMENT si besoin (déjà dans le fichier, mais sécurité)
# RUN pip install "tokenizers==0.15.2"


# ✅ Installation Chonkie compatible 'chunk_hierarchy'

# RUN pip install git+https://github.com/chonkie-inc/chonkie.git


# ✅ Installer Chonkie v1.0.10 avec patch automatique du paramètre chunk_hierarchy
# RUN git clone https://github.com/chonkie-inc/chonkie.git /tmp/chonkie && \
#     cd /tmp/chonkie && \
#     sed -i 's/__init__(/__init__(self, chunk_hierarchy=None, /' /tmp/chonkie/src/chonkie/chunker/recursive.py && \
#     echo "        self.chunk_hierarchy = chunk_hierarchy or [2048, 512, 128]" >> /tmp/chonkie/src/chonkie/chunker/recursive.py && \
#     pip install /tmp/chonkie




# ✅ Installer Chonkie v1.0.10 avec patch chunk_hierarchy (correctement positionné)
RUN git clone https://github.com/chonkie-inc/chonkie.git /tmp/chonkie && \
    cd /tmp/chonkie && \
    sed -i '/class RecursiveChunker/,/def __init__/{/def __init__/ s/self,/chunk_hierarchy=None, self,/}' /tmp/chonkie/src/chonkie/chunker/recursive.py && \
    sed -i '/def __init__/,/^$/ s/self.max_chunk_size/max_chunk_size/' /tmp/chonkie/src/chonkie/chunker/recursive.py && \
    sed -i '/def __init__/,/^$/ s/self.min_chunk_size/min_chunk_size/' /tmp/chonkie/src/chonkie/chunker/recursive.py && \
    sed -i '/def __init__/,/^$/ s/self.chunk_overlap/chunk_overlap/' /tmp/chonkie/src/chonkie/chunker/recursive.py && \
    sed -i '/def __init__/,/^$/ s/self.chunking_strategy/chunking_strategy/' /tmp/chonkie/src/chonkie/chunker/recursive.py && \
    echo "        self.chunk_hierarchy = chunk_hierarchy or [2048, 512, 128]" >> /tmp/chonkie/src/chonkie/chunker/recursive.py && \
    pip install /tmp/chonkie





RUN pip uninstall -y sentence-transformers && \
    pip install sentence-transformers==2.6.1

RUN pip check


# ===== CRÉATION DOSSIERS CACHE =====
RUN mkdir -p /app/cache/huggingface \
    && mkdir -p /app/cache/transformers \
    && mkdir -p /app/cache/hub \
    && chmod -R 755 /app/cache

# ===== COPIE FICHIERS PROJECT =====
COPY . /app

# ===== MISE À JOUR PIP =====
RUN pip install --upgrade pip


# RUN pip install llama-cpp-python==0.2.24 llama-index-llms-llama-cpp==0.1.2

# ✅ PATCH SÉCURITÉ : Réinstaller uvicorn explicitement
RUN pip install uvicorn

RUN rm -rf /root/.cache/pip && pip install --no-cache-dir -r requirements.txt


# ===== EXPOSITION DU PORT =====
EXPOSE 7860

# ===== COMMANDE DE LANCEMENT =====
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]