bluewhale2025 commited on
Commit
5daea2d
·
1 Parent(s): 3c83f33

Fix NLTK data download and path issues

Browse files
Files changed (2) hide show
  1. Dockerfile +7 -0
  2. app.py +39 -3
Dockerfile CHANGED
@@ -41,6 +41,13 @@ COPY --chown=user:user requirements.txt .
41
  # Install Python dependencies
42
  RUN pip install --no-cache-dir -r requirements.txt
43
 
 
 
 
 
 
 
 
44
  # Copy application files
45
  COPY --chown=user:user . .
46
 
 
41
  # Install Python dependencies
42
  RUN pip install --no-cache-dir -r requirements.txt
43
 
44
+ # Download NLTK data during build
45
+ RUN python -c "import nltk; nltk.download('punkt', download_dir='/home/user/app/nltk_data')"
46
+ RUN python -c "import nltk; nltk.download('stopwords', download_dir='/home/user/app/nltk_data')"
47
+
48
+ # Set NLTK_DATA environment variable
49
+ ENV NLTK_DATA=/home/user/app/nltk_data
50
+
51
  # Copy application files
52
  COPY --chown=user:user . .
53
 
app.py CHANGED
@@ -78,16 +78,52 @@ try:
78
  logger.info("NLTK data verified successfully")
79
  except LookupError as e:
80
  logger.warning(f"NLTK data missing: {e}")
81
- # Attempt to download missing data
82
  try:
83
  nltk.download('punkt', download_dir=str(NLTK_DATA_DIR))
 
 
 
 
 
 
 
 
 
 
 
 
84
  nltk.download('stopwords', download_dir=str(NLTK_DATA_DIR))
 
 
 
 
 
 
 
 
 
 
 
 
85
  nltk.download('wordnet', download_dir=str(NLTK_DATA_DIR))
 
 
 
 
 
 
 
 
 
 
 
 
86
  nltk.download('averaged_perceptron_tagger', download_dir=str(NLTK_DATA_DIR))
87
- logger.info("Successfully downloaded NLTK data")
 
88
  except Exception as download_error:
89
  logger.error(f"Failed to download NLTK data: {download_error}")
90
- raise
91
 
92
  except ImportError as e:
93
  logger.error(f"Failed to import required modules: {e}")
 
78
  logger.info("NLTK data verified successfully")
79
  except LookupError as e:
80
  logger.warning(f"NLTK data missing: {e}")
 
81
  try:
82
  nltk.download('punkt', download_dir=str(NLTK_DATA_DIR))
83
+ nltk.data.path.append(str(NLTK_DATA_DIR))
84
+ logger.info("Successfully downloaded NLTK punkt data")
85
+ except Exception as download_error:
86
+ logger.error(f"Failed to download NLTK data: {download_error}")
87
+ # Continue without NLTK data if download fails
88
+
89
+ try:
90
+ nltk.data.find('corpora/stopwords')
91
+ logger.info("NLTK stopwords is available")
92
+ except LookupError as e:
93
+ logger.warning(f"NLTK data missing: {e}")
94
+ try:
95
  nltk.download('stopwords', download_dir=str(NLTK_DATA_DIR))
96
+ nltk.data.path.append(str(NLTK_DATA_DIR))
97
+ logger.info("Successfully downloaded NLTK stopwords data")
98
+ except Exception as download_error:
99
+ logger.error(f"Failed to download NLTK data: {download_error}")
100
+ # Continue without NLTK data if download fails
101
+
102
+ try:
103
+ nltk.data.find('corpora/wordnet')
104
+ logger.info("NLTK wordnet is available")
105
+ except LookupError as e:
106
+ logger.warning(f"NLTK data missing: {e}")
107
+ try:
108
  nltk.download('wordnet', download_dir=str(NLTK_DATA_DIR))
109
+ nltk.data.path.append(str(NLTK_DATA_DIR))
110
+ logger.info("Successfully downloaded NLTK wordnet data")
111
+ except Exception as download_error:
112
+ logger.error(f"Failed to download NLTK data: {download_error}")
113
+ # Continue without NLTK data if download fails
114
+
115
+ try:
116
+ nltk.data.find('taggers/averaged_perceptron_tagger')
117
+ logger.info("NLTK averaged_perceptron_tagger is available")
118
+ except LookupError as e:
119
+ logger.warning(f"NLTK data missing: {e}")
120
+ try:
121
  nltk.download('averaged_perceptron_tagger', download_dir=str(NLTK_DATA_DIR))
122
+ nltk.data.path.append(str(NLTK_DATA_DIR))
123
+ logger.info("Successfully downloaded NLTK averaged_perceptron_tagger data")
124
  except Exception as download_error:
125
  logger.error(f"Failed to download NLTK data: {download_error}")
126
+ # Continue without NLTK data if download fails
127
 
128
  except ImportError as e:
129
  logger.error(f"Failed to import required modules: {e}")