shafiqul1357's picture
upload source code
633bb91 verified
raw
history blame contribute delete
894 Bytes
import os
import nltk
from config import Config
from nltk.corpus import stopwords
from nltk.data import path as nltk_path
class NLTK:
def __init__(self):
# Define your custom download path (e.g., current directory)
self.nltk_data_path = Config.NLTK_DIR
# Tell NLTK to look in your custom location
nltk_path.append(self.nltk_data_path)
self.download_stopwords()
self.stopwords = set(stopwords.words('english'))
self.punctuation = {".", ",", ";", ":", "'", '"', "~", "-", "–", "β€”", "(", ")", "[", "]", "{", "}", "!", "?", "`"}
def download_stopwords(self):
# Full path to the English stopwords file
stopwords_path = os.path.join(self.nltk_data_path, "corpora", "stopwords", "english")
if not os.path.exists(stopwords_path):
nltk.download("stopwords", download_dir=self.nltk_data_path)