Toro-Angel commited on
Commit
3d37937
·
verified ·
1 Parent(s): 70d0f17

Update analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +13 -33
analyzer.py CHANGED
@@ -2,41 +2,21 @@ from flask import Flask, request, jsonify
2
  import os
3
  import re
4
  import json
 
5
  from sklearn.feature_extraction.text import CountVectorizer
6
  from sklearn.naive_bayes import MultinomialNB
7
  from sklearn.pipeline import Pipeline
8
- from sklearn.base import BaseEstimator, TransformerMixin
9
- import string
10
 
11
  app = Flask(__name__)
12
 
13
- # Custom transformer for text cleaning
14
- class TextPreprocessor(BaseEstimator, TransformerMixin):
15
- def __init__(self):
16
- self.stopwords = set(open('stopwords.txt').read().splitlines())
17
- self.lemmatizer = Lemmatizer()
18
-
19
- def fit(self, X, y=None):
20
- return self
21
-
22
- def transform(self, X, y=None):
23
- cleaned_texts = []
24
- for text in X:
25
- text = text.lower()
26
- text = re.sub(r'[^\w\s]', ' ', text)
27
- words = [self.lemmatizer.lemmatize(word) for word in text.split() if word not in self.stopwords]
28
- cleaned_texts.append(' '.join(words))
29
- return cleaned_texts
30
-
31
- # Custom lemmatizer
32
- class Lemmatizer:
33
- def __init__(self):
34
- # Add your lemmatization logic here
35
- pass
36
-
37
- def lemmatize(self, word):
38
- # Add your lemmatization logic here
39
- return word
40
 
41
  # Function to determine sentiment label based on probability
42
  def get_sentiment_label(prob):
@@ -66,7 +46,6 @@ def train_model(json_file_path):
66
  X = [entry['text'] for entry in data]
67
  y = [entry['label'] for entry in data]
68
  pipeline = Pipeline([
69
- ('preprocessor', TextPreprocessor()),
70
  ('vectorizer', CountVectorizer()),
71
  ('classifier', MultinomialNB())
72
  ])
@@ -75,12 +54,13 @@ def train_model(json_file_path):
75
  return pipeline
76
 
77
  # Endpoint to process new reviews
78
- @app.route('/predict', methods=['POST'])
79
  def predict_sentiment():
80
  pipeline = load_model()
81
  new_reviews_json = request.json
82
  new_reviews = [review['CUSTOMERREVIEWS'] for review in new_reviews_json['reviewsModel']]
83
- predicted_probabilities = pipeline.predict_proba(new_reviews)
 
84
  results = []
85
  for i, review_info in enumerate(new_reviews_json['reviewsModel']):
86
  original_review = review_info['CUSTOMERREVIEWS']
@@ -106,4 +86,4 @@ def predict_sentiment():
106
  return jsonify(results)
107
 
108
  if __name__ == '__main__':
109
- app.run(debug=True)
 
2
  import os
3
  import re
4
  import json
5
+ import joblib
6
  from sklearn.feature_extraction.text import CountVectorizer
7
  from sklearn.naive_bayes import MultinomialNB
8
  from sklearn.pipeline import Pipeline
 
 
9
 
10
  app = Flask(__name__)
11
 
12
+ # Function to preprocess text data
13
+ def clean_text(texts):
14
+ cleaned_texts = []
15
+ for text in texts:
16
+ text = text.lower()
17
+ text = re.sub(r'[^\w\s]', ' ', text)
18
+ cleaned_texts.append(text)
19
+ return cleaned_texts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # Function to determine sentiment label based on probability
22
  def get_sentiment_label(prob):
 
46
  X = [entry['text'] for entry in data]
47
  y = [entry['label'] for entry in data]
48
  pipeline = Pipeline([
 
49
  ('vectorizer', CountVectorizer()),
50
  ('classifier', MultinomialNB())
51
  ])
 
54
  return pipeline
55
 
56
  # Endpoint to process new reviews
57
+ @app.route('/', methods=['POST'])
58
  def predict_sentiment():
59
  pipeline = load_model()
60
  new_reviews_json = request.json
61
  new_reviews = [review['CUSTOMERREVIEWS'] for review in new_reviews_json['reviewsModel']]
62
+ cleaned_new_reviews = clean_text(new_reviews)
63
+ predicted_probabilities = pipeline.predict_proba(cleaned_new_reviews)
64
  results = []
65
  for i, review_info in enumerate(new_reviews_json['reviewsModel']):
66
  original_review = review_info['CUSTOMERREVIEWS']
 
86
  return jsonify(results)
87
 
88
  if __name__ == '__main__':
89
+ app.run(debug=True)