File size: 894 Bytes
633bb91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
import nltk
from config import Config
from nltk.corpus import stopwords
from nltk.data import path as nltk_path


class NLTK:
    def __init__(self):
        # Define your custom download path (e.g., current directory)
        self.nltk_data_path = Config.NLTK_DIR

        # Tell NLTK to look in your custom location
        nltk_path.append(self.nltk_data_path)

        self.download_stopwords()

        self.stopwords = set(stopwords.words('english'))
        self.punctuation = {".", ",", ";", ":", "'", '"', "~", "-", "–", "—", "(", ")", "[", "]", "{", "}", "!", "?", "`"}

    def download_stopwords(self):
        # Full path to the English stopwords file
        stopwords_path = os.path.join(self.nltk_data_path, "corpora", "stopwords", "english")

        if not os.path.exists(stopwords_path):
            nltk.download("stopwords", download_dir=self.nltk_data_path)