Spaces:
Build error
Build error
Commit
·
5daea2d
1
Parent(s):
3c83f33
Fix NLTK data download and path issues
Browse files- Dockerfile +7 -0
- app.py +39 -3
Dockerfile
CHANGED
|
@@ -41,6 +41,13 @@ COPY --chown=user:user requirements.txt .
|
|
| 41 |
# Install Python dependencies
|
| 42 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# Copy application files
|
| 45 |
COPY --chown=user:user . .
|
| 46 |
|
|
|
|
| 41 |
# Install Python dependencies
|
| 42 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 43 |
|
| 44 |
+
# Download NLTK data during build
|
| 45 |
+
RUN python -c "import nltk; nltk.download('punkt', download_dir='/home/user/app/nltk_data')"
|
| 46 |
+
RUN python -c "import nltk; nltk.download('stopwords', download_dir='/home/user/app/nltk_data')"
|
| 47 |
+
|
| 48 |
+
# Set NLTK_DATA environment variable
|
| 49 |
+
ENV NLTK_DATA=/home/user/app/nltk_data
|
| 50 |
+
|
| 51 |
# Copy application files
|
| 52 |
COPY --chown=user:user . .
|
| 53 |
|
app.py
CHANGED
|
@@ -78,16 +78,52 @@ try:
|
|
| 78 |
logger.info("NLTK data verified successfully")
|
| 79 |
except LookupError as e:
|
| 80 |
logger.warning(f"NLTK data missing: {e}")
|
| 81 |
-
# Attempt to download missing data
|
| 82 |
try:
|
| 83 |
nltk.download('punkt', download_dir=str(NLTK_DATA_DIR))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
nltk.download('stopwords', download_dir=str(NLTK_DATA_DIR))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
nltk.download('wordnet', download_dir=str(NLTK_DATA_DIR))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
nltk.download('averaged_perceptron_tagger', download_dir=str(NLTK_DATA_DIR))
|
| 87 |
-
|
|
|
|
| 88 |
except Exception as download_error:
|
| 89 |
logger.error(f"Failed to download NLTK data: {download_error}")
|
| 90 |
-
|
| 91 |
|
| 92 |
except ImportError as e:
|
| 93 |
logger.error(f"Failed to import required modules: {e}")
|
|
|
|
| 78 |
logger.info("NLTK data verified successfully")
|
| 79 |
except LookupError as e:
|
| 80 |
logger.warning(f"NLTK data missing: {e}")
|
|
|
|
| 81 |
try:
|
| 82 |
nltk.download('punkt', download_dir=str(NLTK_DATA_DIR))
|
| 83 |
+
nltk.data.path.append(str(NLTK_DATA_DIR))
|
| 84 |
+
logger.info("Successfully downloaded NLTK punkt data")
|
| 85 |
+
except Exception as download_error:
|
| 86 |
+
logger.error(f"Failed to download NLTK data: {download_error}")
|
| 87 |
+
# Continue without NLTK data if download fails
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
nltk.data.find('corpora/stopwords')
|
| 91 |
+
logger.info("NLTK stopwords is available")
|
| 92 |
+
except LookupError as e:
|
| 93 |
+
logger.warning(f"NLTK data missing: {e}")
|
| 94 |
+
try:
|
| 95 |
nltk.download('stopwords', download_dir=str(NLTK_DATA_DIR))
|
| 96 |
+
nltk.data.path.append(str(NLTK_DATA_DIR))
|
| 97 |
+
logger.info("Successfully downloaded NLTK stopwords data")
|
| 98 |
+
except Exception as download_error:
|
| 99 |
+
logger.error(f"Failed to download NLTK data: {download_error}")
|
| 100 |
+
# Continue without NLTK data if download fails
|
| 101 |
+
|
| 102 |
+
try:
|
| 103 |
+
nltk.data.find('corpora/wordnet')
|
| 104 |
+
logger.info("NLTK wordnet is available")
|
| 105 |
+
except LookupError as e:
|
| 106 |
+
logger.warning(f"NLTK data missing: {e}")
|
| 107 |
+
try:
|
| 108 |
nltk.download('wordnet', download_dir=str(NLTK_DATA_DIR))
|
| 109 |
+
nltk.data.path.append(str(NLTK_DATA_DIR))
|
| 110 |
+
logger.info("Successfully downloaded NLTK wordnet data")
|
| 111 |
+
except Exception as download_error:
|
| 112 |
+
logger.error(f"Failed to download NLTK data: {download_error}")
|
| 113 |
+
# Continue without NLTK data if download fails
|
| 114 |
+
|
| 115 |
+
try:
|
| 116 |
+
nltk.data.find('taggers/averaged_perceptron_tagger')
|
| 117 |
+
logger.info("NLTK averaged_perceptron_tagger is available")
|
| 118 |
+
except LookupError as e:
|
| 119 |
+
logger.warning(f"NLTK data missing: {e}")
|
| 120 |
+
try:
|
| 121 |
nltk.download('averaged_perceptron_tagger', download_dir=str(NLTK_DATA_DIR))
|
| 122 |
+
nltk.data.path.append(str(NLTK_DATA_DIR))
|
| 123 |
+
logger.info("Successfully downloaded NLTK averaged_perceptron_tagger data")
|
| 124 |
except Exception as download_error:
|
| 125 |
logger.error(f"Failed to download NLTK data: {download_error}")
|
| 126 |
+
# Continue without NLTK data if download fails
|
| 127 |
|
| 128 |
except ImportError as e:
|
| 129 |
logger.error(f"Failed to import required modules: {e}")
|