Spaces:
Sleeping
Sleeping
| import os | |
| import nltk | |
| from config import Config | |
| from nltk.corpus import stopwords | |
| from nltk.data import path as nltk_path | |
| class NLTK: | |
| def __init__(self): | |
| # Define your custom download path (e.g., current directory) | |
| self.nltk_data_path = Config.NLTK_DIR | |
| # Tell NLTK to look in your custom location | |
| nltk_path.append(self.nltk_data_path) | |
| self.download_stopwords() | |
| self.stopwords = set(stopwords.words('english')) | |
| self.punctuation = {".", ",", ";", ":", "'", '"', "~", "-", "β", "β", "(", ")", "[", "]", "{", "}", "!", "?", "`"} | |
| def download_stopwords(self): | |
| # Full path to the English stopwords file | |
| stopwords_path = os.path.join(self.nltk_data_path, "corpora", "stopwords", "english") | |
| if not os.path.exists(stopwords_path): | |
| nltk.download("stopwords", download_dir=self.nltk_data_path) | |