Spaces:
Runtime error
Runtime error
File size: 3,693 Bytes
3f3e2e1 6a48370 a06d116 3d37937 3f3e2e1 a61f1f4 d74b510 767822b ac877f2 a61f1f4 3d37937 9e052f3 cc881f0 a06d116 3d37937 a06d116 9e052f3 3d37937 3f3e2e1 97b1bde f1479d0 3f3e2e1 9e052f3 3d37937 3f3e2e1 a61f1f4 9668e53 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | from flask import Flask, request, jsonify
import os
import re
import json
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
nltk.download('punkt', download_dir='/tmp/nltk_data')
nltk.download('stopwords', download_dir='/tmp/nltk_data')
nltk.download('wordnet', download_dir='/tmp/nltk_data')
nltk.data.path.append('/tmp/nltk_data')
app = Flask(__name__)
# Function to preprocess text data
def clean_text_with_lemmatization(texts):
stopwords_set = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
cleaned_texts = []
for text in texts:
text = text.lower()
text = re.sub(r'[^\w\s]', ' ', text)
words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stopwords_set]
cleaned_texts.append(' '.join(words))
return cleaned_texts
# Function to determine sentiment label based on probability
def get_sentiment_label(prob):
if prob < 0.30:
return "Negative"
elif 0.35 <= prob < 0.5:
return "Slightly Negative"
elif prob == 0.5:
return "Neutral"
elif 0.5 < prob < 0.70:
return "Slightly Positive"
else:
return "Positive"
# Function to load or train the model
def load_model():
model_path = 'sentiment_pipeline.pkl'
if os.path.exists(model_path):
return joblib.load(model_path)
else:
return train_model('trainData.json')
# Function to train the model
def train_model(json_file_path):
with open(json_file_path, 'r') as file:
data = json.load(file)
X = [entry['text'] for entry in data]
y = [entry['label'] for entry in data]
pipeline = Pipeline([
('vectorizer', CountVectorizer()),
('classifier', MultinomialNB())
])
pipeline.fit(X, y)
joblib.dump(pipeline, 'sentiment_pipeline.pkl')
return pipeline
# Endpoint to process new reviews
@app.route('/', methods=['POST'])
def predict_sentiment():
pipeline = load_model()
new_reviews_json = request.json
new_reviews = [review['CUSTOMERREVIEWS'] for review in new_reviews_json['reviewsModel']]
cleaned_new_reviews = clean_text_with_lemmatization(new_reviews)
predicted_probabilities = pipeline.predict_proba(cleaned_new_reviews)
results = []
for i, review_info in enumerate(new_reviews_json['reviewsModel']):
original_review = review_info['CUSTOMERREVIEWS']
negative_probability_new = predicted_probabilities[i, 0]
positive_probability_new = predicted_probabilities[i, 1]
sentiment = get_sentiment_label(positive_probability_new)
formatted_negative_probability = f"{negative_probability_new:.4f}"
formatted_positive_probability = f"{positive_probability_new:.4f}"
results.append({
"review_text": original_review,
"negative_probability": formatted_negative_probability,
"positive_probability": formatted_positive_probability,
"sentiment": sentiment
})
overall_negative_probability = predicted_probabilities.mean(axis=0)[0]
overall_positive_probability = predicted_probabilities.mean(axis=0)[1]
overall_sentiment = get_sentiment_label(overall_positive_probability)
results.append({
"overall_negative_probability": f"{overall_negative_probability:.4f}",
"overall_positive_probability": f"{overall_positive_probability:.4f}",
"overall_sentiment": overall_sentiment
})
return jsonify(results)
#if __name__ == '__main__':
# app.run(debug=True) |