Spaces:
Runtime error
Runtime error
| from flask import Flask, request, jsonify | |
| import os | |
| import re | |
| import json | |
| import nltk | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.corpus import stopwords | |
| import joblib | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.pipeline import Pipeline | |
| nltk.download('punkt', download_dir='/tmp/nltk_data') | |
| nltk.download('stopwords', download_dir='/tmp/nltk_data') | |
| nltk.download('wordnet', download_dir='/tmp/nltk_data') | |
| nltk.data.path.append('/tmp/nltk_data') | |
| app = Flask(__name__) | |
| # Function to preprocess text data | |
| def clean_text_with_lemmatization(texts): | |
| stopwords_set = set(stopwords.words('english')) | |
| lemmatizer = WordNetLemmatizer() | |
| cleaned_texts = [] | |
| for text in texts: | |
| text = text.lower() | |
| text = re.sub(r'[^\w\s]', ' ', text) | |
| words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stopwords_set] | |
| cleaned_texts.append(' '.join(words)) | |
| return cleaned_texts | |
| # Function to determine sentiment label based on probability | |
| def get_sentiment_label(prob): | |
| if prob < 0.30: | |
| return "Negative" | |
| elif 0.35 <= prob < 0.5: | |
| return "Slightly Negative" | |
| elif prob == 0.5: | |
| return "Neutral" | |
| elif 0.5 < prob < 0.70: | |
| return "Slightly Positive" | |
| else: | |
| return "Positive" | |
| # Function to load or train the model | |
| def load_model(): | |
| model_path = 'sentiment_pipeline.pkl' | |
| if os.path.exists(model_path): | |
| return joblib.load(model_path) | |
| else: | |
| return train_model('trainData.json') | |
| # Function to train the model | |
| def train_model(json_file_path): | |
| with open(json_file_path, 'r') as file: | |
| data = json.load(file) | |
| X = [entry['text'] for entry in data] | |
| y = [entry['label'] for entry in data] | |
| pipeline = Pipeline([ | |
| ('vectorizer', CountVectorizer()), | |
| ('classifier', MultinomialNB()) | |
| ]) | |
| pipeline.fit(X, y) | |
| joblib.dump(pipeline, 'sentiment_pipeline.pkl') | |
| return pipeline | |
| # Endpoint to process new reviews | |
| def predict_sentiment(): | |
| pipeline = load_model() | |
| new_reviews_json = request.json | |
| new_reviews = [review['CUSTOMERREVIEWS'] for review in new_reviews_json['reviewsModel']] | |
| cleaned_new_reviews = clean_text_with_lemmatization(new_reviews) | |
| predicted_probabilities = pipeline.predict_proba(cleaned_new_reviews) | |
| results = [] | |
| for i, review_info in enumerate(new_reviews_json['reviewsModel']): | |
| original_review = review_info['CUSTOMERREVIEWS'] | |
| negative_probability_new = predicted_probabilities[i, 0] | |
| positive_probability_new = predicted_probabilities[i, 1] | |
| sentiment = get_sentiment_label(positive_probability_new) | |
| formatted_negative_probability = f"{negative_probability_new:.4f}" | |
| formatted_positive_probability = f"{positive_probability_new:.4f}" | |
| results.append({ | |
| "review_text": original_review, | |
| "negative_probability": formatted_negative_probability, | |
| "positive_probability": formatted_positive_probability, | |
| "sentiment": sentiment | |
| }) | |
| overall_negative_probability = predicted_probabilities.mean(axis=0)[0] | |
| overall_positive_probability = predicted_probabilities.mean(axis=0)[1] | |
| overall_sentiment = get_sentiment_label(overall_positive_probability) | |
| results.append({ | |
| "overall_negative_probability": f"{overall_negative_probability:.4f}", | |
| "overall_positive_probability": f"{overall_positive_probability:.4f}", | |
| "overall_sentiment": overall_sentiment | |
| }) | |
| return jsonify(results) | |
| #if __name__ == '__main__': | |
| # app.run(debug=True) |