Spaces:

Toro-Angel
/

CustomerReviewsSentimentAnalysis

Runtime error

Toro-Angel commited on Mar 7, 2024

Commit

9e052f3

verified ·

1 Parent(s): aa98071

Update analyzer.py

Files changed (1) hide show

analyzer.py CHANGED Viewed

@@ -6,16 +6,19 @@ import joblib
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.pipeline import Pipeline
 app = Flask(__name__)
 # Function to preprocess text data
-def clean_text(texts):
     cleaned_texts = []
     for text in texts:
         text = text.lower()
         text = re.sub(r'[^\w\s]', ' ', text)
-        cleaned_texts.append(text)
     return cleaned_texts
 # Function to determine sentiment label based on probability
@@ -54,12 +57,12 @@ def train_model(json_file_path):
     return pipeline
 # Endpoint to process new reviews
-@app.route('/', methods=['POST'])
 def predict_sentiment():
     pipeline = load_model()
     new_reviews_json = request.json
     new_reviews = [review['CUSTOMERREVIEWS'] for review in new_reviews_json['reviewsModel']]
-    cleaned_new_reviews = clean_text(new_reviews)
     predicted_probabilities = pipeline.predict_proba(cleaned_new_reviews)
     results = []
     for i, review_info in enumerate(new_reviews_json['reviewsModel']):

 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.pipeline import Pipeline
+import spacy
+nlp = spacy.load('en_core_web_sm')
 app = Flask(__name__)
 # Function to preprocess text data
+def clean_text_with_lemmatization(texts):
     cleaned_texts = []
     for text in texts:
         text = text.lower()
         text = re.sub(r'[^\w\s]', ' ', text)
+        words = [token.lemma_ for token in nlp(text) if not token.is_stop]
+        cleaned_texts.append(' '.join(words))
     return cleaned_texts
 # Function to determine sentiment label based on probability
     return pipeline
 # Endpoint to process new reviews
+@app.route('/predict', methods=['POST'])
 def predict_sentiment():
     pipeline = load_model()
     new_reviews_json = request.json
     new_reviews = [review['CUSTOMERREVIEWS'] for review in new_reviews_json['reviewsModel']]
+    cleaned_new_reviews = clean_text_with_lemmatization(new_reviews)
     predicted_probabilities = pipeline.predict_proba(cleaned_new_reviews)
     results = []
     for i, review_info in enumerate(new_reviews_json['reviewsModel']):