Toro-Angel commited on
Commit
a06d116
·
verified ·
1 Parent(s): 345450e

Update analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +7 -4
analyzer.py CHANGED
@@ -1,24 +1,27 @@
1
- !python -m spacy download en_core_web_sm
2
  from flask import Flask, request, jsonify
3
  import os
4
  import re
5
  import json
 
 
6
  import joblib
7
  from sklearn.feature_extraction.text import CountVectorizer
8
  from sklearn.naive_bayes import MultinomialNB
9
  from sklearn.pipeline import Pipeline
10
- import spacy
11
 
12
- nlp = spacy.load('en_core_web_sm')
 
13
  app = Flask(__name__)
14
 
15
  # Function to preprocess text data
16
  def clean_text_with_lemmatization(texts):
 
 
17
  cleaned_texts = []
18
  for text in texts:
19
  text = text.lower()
20
  text = re.sub(r'[^\w\s]', ' ', text)
21
- words = [token.lemma_ for token in nlp(text) if not token.is_stop]
22
  cleaned_texts.append(' '.join(words))
23
  return cleaned_texts
24
 
 
 
1
  from flask import Flask, request, jsonify
2
  import os
3
  import re
4
  import json
5
+ from nltk.stem import WordNetLemmatizer
6
+ from nltk.corpus import stopwords
7
  import joblib
8
  from sklearn.feature_extraction.text import CountVectorizer
9
  from sklearn.naive_bayes import MultinomialNB
10
  from sklearn.pipeline import Pipeline
 
11
 
12
+ nltk.data.path.append('https://drive.google.com/drive/folders/1rc3yfpeKaQswBVe9wJKYWEmz1tHguDzF?usp=sharing')
13
+ nltk.data.path.append('https://drive.google.com/drive/folders/1qpWGiSd0slH0QvUhm8BXwgMx18PvFlW1?usp=sharing')
14
  app = Flask(__name__)
15
 
16
  # Function to preprocess text data
17
  def clean_text_with_lemmatization(texts):
18
+ stopwords_set = set(stopwords.words('english'))
19
+ lemmatizer = WordNetLemmatizer()
20
  cleaned_texts = []
21
  for text in texts:
22
  text = text.lower()
23
  text = re.sub(r'[^\w\s]', ' ', text)
24
+ words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stopwords_set]
25
  cleaned_texts.append(' '.join(words))
26
  return cleaned_texts
27