Spaces:
Runtime error
Runtime error
correction NLTK
Browse files- Dockerfile +9 -14
- app.py +4 -2
- patches/llama_patch.py +20 -0
Dockerfile
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
-
# Image de base
|
| 2 |
FROM python:3.10-slim
|
| 3 |
|
| 4 |
-
# Dépendances système
|
| 5 |
RUN apt-get update && apt-get install -y \
|
| 6 |
build-essential \
|
| 7 |
cmake \
|
|
@@ -11,26 +11,21 @@ RUN apt-get update && apt-get install -y \
|
|
| 11 |
curl \
|
| 12 |
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
-
#
|
| 15 |
WORKDIR /code
|
| 16 |
|
| 17 |
-
# Copier les
|
| 18 |
COPY requirements.txt .
|
| 19 |
-
|
| 20 |
-
# Installer les dépendances Python
|
| 21 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
RUN mkdir -p /tmp/nltk_data
|
| 25 |
-
|
| 26 |
-
# ✅ Télécharger punkt AVANT le lancement de l'app
|
| 27 |
-
RUN python -m nltk.downloader -d /tmp/nltk_data punkt
|
| 28 |
|
| 29 |
-
# Copier le
|
| 30 |
COPY . .
|
| 31 |
|
| 32 |
-
# Exposer le port
|
| 33 |
EXPOSE 7860
|
| 34 |
|
| 35 |
-
#
|
| 36 |
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
|
|
|
| 1 |
+
# 🐍 Image de base
|
| 2 |
FROM python:3.10-slim
|
| 3 |
|
| 4 |
+
# 🧱 Dépendances système
|
| 5 |
RUN apt-get update && apt-get install -y \
|
| 6 |
build-essential \
|
| 7 |
cmake \
|
|
|
|
| 11 |
curl \
|
| 12 |
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
+
# 📁 Dossier de travail
|
| 15 |
WORKDIR /code
|
| 16 |
|
| 17 |
+
# 📝 Copier les requirements et installer les dépendances
|
| 18 |
COPY requirements.txt .
|
|
|
|
|
|
|
| 19 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
|
| 21 |
+
# 📦 Préparer le cache NLTK
|
| 22 |
+
RUN mkdir -p /tmp/nltk_data && python -m nltk.downloader -d /tmp/nltk_data punkt
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
# 📁 Copier tout le code
|
| 25 |
COPY . .
|
| 26 |
|
| 27 |
+
# 📤 Exposer le port Streamlit
|
| 28 |
EXPOSE 7860
|
| 29 |
|
| 30 |
+
# 🚀 Lancer l'application (le patch doit être dans app.py AVANT l'import llama_index)
|
| 31 |
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
app.py
CHANGED
|
@@ -7,10 +7,12 @@ from huggingface_hub import hf_hub_download
|
|
| 7 |
import time
|
| 8 |
|
| 9 |
|
| 10 |
-
|
| 11 |
os.environ["NLTK_DATA"] = "/tmp/nltk_data"
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
|
| 15 |
logger = logging.getLogger("Streamlit")
|
| 16 |
logger.setLevel(logging.INFO)
|
|
|
|
| 7 |
import time
|
| 8 |
|
| 9 |
|
| 10 |
+
import os
|
| 11 |
os.environ["NLTK_DATA"] = "/tmp/nltk_data"
|
| 12 |
|
| 13 |
+
# Appliquer le patch avant tout import de llama_index
|
| 14 |
+
from patches.llama_patch import patch_llamaindex_nltk
|
| 15 |
+
patch_llamaindex_nltk()
|
| 16 |
|
| 17 |
logger = logging.getLogger("Streamlit")
|
| 18 |
logger.setLevel(logging.INFO)
|
patches/llama_patch.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import nltk
|
| 3 |
+
|
| 4 |
+
# Forcer le cache dans /tmp
|
| 5 |
+
NLTK_CACHE_DIR = "/tmp/nltk_data"
|
| 6 |
+
|
| 7 |
+
def patch_llamaindex_nltk():
|
| 8 |
+
try:
|
| 9 |
+
from llama_index.core.utils import GlobalsHelper
|
| 10 |
+
class PatchedGlobalsHelper(GlobalsHelper):
|
| 11 |
+
def __init__(self):
|
| 12 |
+
# Rediriger vers /tmp
|
| 13 |
+
self._nltk_data_dir = NLTK_CACHE_DIR
|
| 14 |
+
# Télécharger punkt si nécessaire
|
| 15 |
+
try:
|
| 16 |
+
nltk.data.find("tokenizers/punkt")
|
| 17 |
+
except LookupError:
|
| 18 |
+
nltk.download("punkt", download_dir=self._nltk_data_dir)
|
| 19 |
+
except Exception as e:
|
| 20 |
+
print("[patch_llamaindex_nltk] Failed to patch:", e)
|