| """
|
| Stub implementation of NLTK to avoid dependencies in container environments
|
| """
|
|
|
| import logging
|
| logger = logging.getLogger(__name__)
|
|
|
| logger.info("Using stub NLTK implementation")
|
|
|
|
|
| def download(*args, **kwargs):
|
| logger.warning("NLTK download stub called - no actual download performed")
|
| return True
|
|
|
|
|
| class SimpleTokenizer:
|
| """A simple tokenizer implementation for the NLTK stub"""
|
|
|
| def __init__(self):
|
| logger.info("Stub SimpleTokenizer initialized")
|
|
|
| def tokenize(self, text):
|
| """Simple word tokenization by whitespace"""
|
| return text.split()
|
|
|
|
|
| class WordTokenizer:
|
| def tokenize(self, text):
|
| return text.split()
|
|
|
| def word_tokenize(text):
|
| return text.split()
|
|
|
| class SentenceTokenizer:
|
| def tokenize(self, text):
|
| return text.split('.')
|
|
|
| def sent_tokenize(text):
|
| return text.split('.')
|
|
|
|
|
| class PorterStemmer:
|
| def stem(self, word):
|
|
|
| if word.endswith('ing'):
|
| return word[:-3]
|
| elif word.endswith('ed'):
|
| return word[:-2]
|
| elif word.endswith('s') and not word.endswith('ss'):
|
| return word[:-1]
|
| return word
|
|
|
| class LancasterStemmer:
|
| def stem(self, word):
|
| return PorterStemmer().stem(word)
|
|
|
| class SimpleStemmer:
|
| def __init__(self):
|
| logger.info("SimpleStemmer stub initialized")
|
|
|
| def stem(self, word):
|
|
|
| if word.endswith('ing'):
|
| return word[:-3]
|
| elif word.endswith('ed'):
|
| return word[:-2]
|
| elif word.endswith('s') and not word.endswith('ss'):
|
| return word[:-1]
|
| return word
|
|
|
|
|
| class WordNetLemmatizer:
|
| def __init__(self):
|
| logger.info("Stub WordNetLemmatizer initialized")
|
|
|
| def lemmatize(self, word, pos=None):
|
|
|
| return word
|
|
|
|
|
| class tokenize:
|
| WordTokenizer = WordTokenizer
|
| SentenceTokenizer = SentenceTokenizer
|
| word_tokenize = word_tokenize
|
| sent_tokenize = sent_tokenize
|
|
|
| class stem:
|
| PorterStemmer = PorterStemmer
|
| LancasterStemmer = LancasterStemmer
|
| SimpleStemmer = SimpleStemmer
|
|
|
|
|
| class _CorpusModule:
|
| class stopwords:
|
| @staticmethod
|
| def words(language="english"):
|
|
|
| return {
|
| "i", "me", "my", "myself", "we", "our", "ours", "ourselves",
|
| "you", "your", "yours", "yourself", "yourselves", "he", "him",
|
| "his", "himself", "she", "her", "hers", "herself", "it", "its",
|
| "itself", "they", "them", "their", "theirs", "themselves",
|
| "what", "which", "who", "whom", "this", "that", "these",
|
| "those", "am", "is", "are", "was", "were", "be", "been",
|
| "being", "have", "has", "had", "having", "do", "does", "did",
|
| "doing", "a", "an", "the", "and", "but", "if", "or", "because",
|
| "as", "until", "while", "of", "at", "by", "for", "with",
|
| "about", "against", "between", "into", "through", "during",
|
| "before", "after", "above", "below", "to", "from", "up", "down",
|
| "in", "out", "on", "off", "over", "under", "again", "further",
|
| "then", "once", "here", "there", "when", "where", "why", "how",
|
| "all", "any", "both", "each", "few", "more", "most", "other",
|
| "some", "such", "no", "nor", "not", "only", "own", "same", "so",
|
| "than", "too", "very", "s", "t", "can", "will", "just", "don",
|
| "should", "now", "d", "ll", "m", "o", "re", "ve", "y", "ain",
|
| "aren", "couldn", "didn", "doesn", "hadn", "hasn", "haven",
|
| }
|
|
|
| corpus = _CorpusModule()
|
|
|