parseai-document-processor / download_nltk_data.py
bluewhale2025's picture
Initial commit: Add ParseAI document processor application
3022fd1
raw
history blame contribute delete
840 Bytes
#!/usr/bin/env python3
import nltk
import os
def download_nltk_data():
# Ensure the NLTK data directory exists
nltk_data_dir = os.getenv('NLTK_DATA', '/app/nltk_data')
os.makedirs(nltk_data_dir, exist_ok=True)
# Set NLTK data path
nltk.data.path.append(nltk_data_dir)
# Download required NLTK data
print("Downloading NLTK data...")
try:
nltk.download('punkt', download_dir=nltk_data_dir)
nltk.download('stopwords', download_dir=nltk_data_dir)
nltk.download('wordnet', download_dir=nltk_data_dir)
nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
print("NLTK data downloaded successfully!")
except Exception as e:
print(f"Error downloading NLTK data: {e}")
raise
if __name__ == "__main__":
download_nltk_data()