Spaces:
Runtime error
Runtime error
| import re | |
| import string | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| nltk.download('punkt') | |
| nltk.download('punkt_tab') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| stop_words = set(stopwords.words('english')) | |
| lemmatizer = WordNetLemmatizer() | |
| def preprocess_text(text: str) -> str: | |
| """Clean and preprocess input text.""" | |
| text = text.lower() | |
| text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text) | |
| tokens = nltk.word_tokenize(text) | |
| tokens = [word for word in tokens if word not in stop_words and word not in string.punctuation] | |
| tokens = [lemmatizer.lemmatize(word) for word in tokens] | |
| return " ".join(tokens) | |