shafiqul1357's picture
upload source code
633bb91 verified
raw
history blame
894 Bytes
import os
import nltk
from config import Config
from nltk.corpus import stopwords
from nltk.data import path as nltk_path
class NLTK:
def __init__(self):
# Define your custom download path (e.g., current directory)
self.nltk_data_path = Config.NLTK_DIR
# Tell NLTK to look in your custom location
nltk_path.append(self.nltk_data_path)
self.download_stopwords()
self.stopwords = set(stopwords.words('english'))
self.punctuation = {".", ",", ";", ":", "'", '"', "~", "-", "–", "β€”", "(", ")", "[", "]", "{", "}", "!", "?", "`"}
def download_stopwords(self):
# Full path to the English stopwords file
stopwords_path = os.path.join(self.nltk_data_path, "corpora", "stopwords", "english")
if not os.path.exists(stopwords_path):
nltk.download("stopwords", download_dir=self.nltk_data_path)