Spaces:
Sleeping
Sleeping
Commit
·
fcbf7d5
1
Parent(s):
e6dab59
fix
Browse files- Dockerfile +27 -2
- app.py +19 -0
- requirements.txt +4 -1
Dockerfile
CHANGED
|
@@ -12,8 +12,13 @@ WORKDIR /app
|
|
| 12 |
RUN apt-get update && apt-get install -y \
|
| 13 |
gcc \
|
| 14 |
g++ \
|
|
|
|
| 15 |
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Copy requirements first for better Docker layer caching
|
| 18 |
COPY requirements.txt .
|
| 19 |
|
|
@@ -21,11 +26,31 @@ COPY requirements.txt .
|
|
| 21 |
RUN pip install --no-cache-dir --upgrade pip && \
|
| 22 |
pip install --no-cache-dir -r requirements.txt
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# Copy application code
|
| 25 |
COPY app.py .
|
| 26 |
|
| 27 |
-
# Create a non-root user for security
|
| 28 |
-
RUN useradd -m -u 1000 appuser &&
|
|
|
|
|
|
|
| 29 |
USER appuser
|
| 30 |
|
| 31 |
# Expose the port
|
|
|
|
| 12 |
RUN apt-get update && apt-get install -y \
|
| 13 |
gcc \
|
| 14 |
g++ \
|
| 15 |
+
curl \
|
| 16 |
&& rm -rf /var/lib/apt/lists/*
|
| 17 |
|
| 18 |
+
# Create NLTK data directory with proper permissions
|
| 19 |
+
RUN mkdir -p /usr/local/nltk_data && chmod 755 /usr/local/nltk_data
|
| 20 |
+
ENV NLTK_DATA=/usr/local/nltk_data
|
| 21 |
+
|
| 22 |
# Copy requirements first for better Docker layer caching
|
| 23 |
COPY requirements.txt .
|
| 24 |
|
|
|
|
| 26 |
RUN pip install --no-cache-dir --upgrade pip && \
|
| 27 |
pip install --no-cache-dir -r requirements.txt
|
| 28 |
|
| 29 |
+
# Download all potentially needed NLTK data during build
|
| 30 |
+
# This ensures we have permissions and avoids runtime download issues
|
| 31 |
+
RUN python -c "import nltk; \
|
| 32 |
+
nltk.download('punkt', download_dir='/usr/local/nltk_data', quiet=True); \
|
| 33 |
+
nltk.download('punkt_tab', download_dir='/usr/local/nltk_data', quiet=True); \
|
| 34 |
+
nltk.download('stopwords', download_dir='/usr/local/nltk_data', quiet=True); \
|
| 35 |
+
nltk.download('averaged_perceptron_tagger', download_dir='/usr/local/nltk_data', quiet=True); \
|
| 36 |
+
print('NLTK data download completed successfully')"
|
| 37 |
+
|
| 38 |
+
# Verify NLTK data was downloaded correctly
|
| 39 |
+
RUN python -c "import nltk; \
|
| 40 |
+
try: \
|
| 41 |
+
nltk.data.find('tokenizers/punkt'); \
|
| 42 |
+
print('NLTK punkt tokenizer found successfully'); \
|
| 43 |
+
except LookupError: \
|
| 44 |
+
print('Warning: NLTK punkt tokenizer not found'); \
|
| 45 |
+
exit(1)"
|
| 46 |
+
|
| 47 |
# Copy application code
|
| 48 |
COPY app.py .
|
| 49 |
|
| 50 |
+
# Create a non-root user for security but ensure they can access NLTK data
|
| 51 |
+
RUN useradd -m -u 1000 appuser && \
|
| 52 |
+
chown -R appuser:appuser /app && \
|
| 53 |
+
chmod -R 755 /usr/local/nltk_data
|
| 54 |
USER appuser
|
| 55 |
|
| 56 |
# Expose the port
|
app.py
CHANGED
|
@@ -1,5 +1,24 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
|
| 4 |
from llama_index.embeddings.cohere import CohereEmbedding
|
| 5 |
from llama_index.llms.groq import Groq
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
+
|
| 4 |
+
# Handle NLTK setup early with proper error handling
|
| 5 |
+
try:
|
| 6 |
+
import nltk
|
| 7 |
+
# Ensure NLTK data is available, try to download if missing
|
| 8 |
+
try:
|
| 9 |
+
nltk.data.find('tokenizers/punkt')
|
| 10 |
+
except LookupError:
|
| 11 |
+
print("NLTK punkt tokenizer not found, attempting to download...")
|
| 12 |
+
try:
|
| 13 |
+
nltk.download('punkt', quiet=True)
|
| 14 |
+
nltk.download('punkt_tab', quiet=True)
|
| 15 |
+
except Exception as e:
|
| 16 |
+
print(f"Warning: Could not download NLTK data: {e}")
|
| 17 |
+
print("This may cause issues with text processing")
|
| 18 |
+
except ImportError:
|
| 19 |
+
print("NLTK not available, continuing without it")
|
| 20 |
+
|
| 21 |
+
# Now import LlamaIndex components
|
| 22 |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
|
| 23 |
from llama_index.embeddings.cohere import CohereEmbedding
|
| 24 |
from llama_index.llms.groq import Groq
|
requirements.txt
CHANGED
|
@@ -13,4 +13,7 @@ transformers>=4.30.0,<5.0.0
|
|
| 13 |
|
| 14 |
# Add explicit dependencies that might be causing issues
|
| 15 |
pydantic>=2.0.0,<3.0.0
|
| 16 |
-
fastapi>=0.100.0,<1.0.0
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Add explicit dependencies that might be causing issues
|
| 15 |
pydantic>=2.0.0,<3.0.0
|
| 16 |
+
fastapi>=0.100.0,<1.0.0
|
| 17 |
+
|
| 18 |
+
# Explicitly include NLTK with a compatible version
|
| 19 |
+
nltk>=3.8,<4.0
|