Toro-Angel commited on
Commit
9e052f3
·
verified ·
1 Parent(s): aa98071

Update analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +7 -4
analyzer.py CHANGED
@@ -6,16 +6,19 @@ import joblib
6
  from sklearn.feature_extraction.text import CountVectorizer
7
  from sklearn.naive_bayes import MultinomialNB
8
  from sklearn.pipeline import Pipeline
 
9
 
 
10
  app = Flask(__name__)
11
 
12
  # Function to preprocess text data
13
- def clean_text(texts):
14
  cleaned_texts = []
15
  for text in texts:
16
  text = text.lower()
17
  text = re.sub(r'[^\w\s]', ' ', text)
18
- cleaned_texts.append(text)
 
19
  return cleaned_texts
20
 
21
  # Function to determine sentiment label based on probability
@@ -54,12 +57,12 @@ def train_model(json_file_path):
54
  return pipeline
55
 
56
  # Endpoint to process new reviews
57
- @app.route('/', methods=['POST'])
58
  def predict_sentiment():
59
  pipeline = load_model()
60
  new_reviews_json = request.json
61
  new_reviews = [review['CUSTOMERREVIEWS'] for review in new_reviews_json['reviewsModel']]
62
- cleaned_new_reviews = clean_text(new_reviews)
63
  predicted_probabilities = pipeline.predict_proba(cleaned_new_reviews)
64
  results = []
65
  for i, review_info in enumerate(new_reviews_json['reviewsModel']):
 
6
  from sklearn.feature_extraction.text import CountVectorizer
7
  from sklearn.naive_bayes import MultinomialNB
8
  from sklearn.pipeline import Pipeline
9
+ import spacy
10
 
11
+ nlp = spacy.load('en_core_web_sm')
12
  app = Flask(__name__)
13
 
14
  # Function to preprocess text data
15
+ def clean_text_with_lemmatization(texts):
16
  cleaned_texts = []
17
  for text in texts:
18
  text = text.lower()
19
  text = re.sub(r'[^\w\s]', ' ', text)
20
+ words = [token.lemma_ for token in nlp(text) if not token.is_stop]
21
+ cleaned_texts.append(' '.join(words))
22
  return cleaned_texts
23
 
24
  # Function to determine sentiment label based on probability
 
57
  return pipeline
58
 
59
  # Endpoint to process new reviews
60
+ @app.route('/predict', methods=['POST'])
61
  def predict_sentiment():
62
  pipeline = load_model()
63
  new_reviews_json = request.json
64
  new_reviews = [review['CUSTOMERREVIEWS'] for review in new_reviews_json['reviewsModel']]
65
+ cleaned_new_reviews = clean_text_with_lemmatization(new_reviews)
66
  predicted_probabilities = pipeline.predict_proba(cleaned_new_reviews)
67
  results = []
68
  for i, review_info in enumerate(new_reviews_json['reviewsModel']):