bluewhale2025 commited on
Commit
29ad632
·
1 Parent(s): 454f21d

Remove punkt_tab references and update NLTK data handling

Browse files
Files changed (2) hide show
  1. Dockerfile +0 -1
  2. app.py +1 -2
Dockerfile CHANGED
@@ -58,7 +58,6 @@ RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/shar
58
  && python -c "import nltk; nltk.download('stopwords', download_dir='/usr/local/share/nltk_data')" \
59
  && python -c "import nltk; nltk.download('wordnet', download_dir='/usr/local/share/nltk_data')" \
60
  && python -c "import nltk; nltk.download('averaged_perceptron_tagger', download_dir='/usr/local/share/nltk_data')" \
61
- && python -c "import nltk; nltk.download('punkt_tab', download_dir='/usr/local/share/nltk_data')" \
62
  && chmod -R 755 /usr/local/share/nltk_data
63
 
64
  # Set NLTK_DATA environment variable
 
58
  && python -c "import nltk; nltk.download('stopwords', download_dir='/usr/local/share/nltk_data')" \
59
  && python -c "import nltk; nltk.download('wordnet', download_dir='/usr/local/share/nltk_data')" \
60
  && python -c "import nltk; nltk.download('averaged_perceptron_tagger', download_dir='/usr/local/share/nltk_data')" \
 
61
  && chmod -R 755 /usr/local/share/nltk_data
62
 
63
  # Set NLTK_DATA environment variable
app.py CHANGED
@@ -93,8 +93,7 @@ try:
93
  'tokenizers/punkt',
94
  'corpora/stopwords',
95
  'corpora/wordnet',
96
- 'taggers/averaged_perceptron_tagger',
97
- 'tokenizers/punkt_tab/english'
98
  ]
99
 
100
  for resource in required_nltk_data:
 
93
  'tokenizers/punkt',
94
  'corpora/stopwords',
95
  'corpora/wordnet',
96
+ 'taggers/averaged_perceptron_tagger'
 
97
  ]
98
 
99
  for resource in required_nltk_data: