Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import joblib | |
| import re | |
| import nltk | |
| from nltk.corpus import stopwords | |
| nltk.download('stopwords') | |
| stop_words = set(stopwords.words('english')) | |
| # LOAD MODELS AND VECTORIZER | |
| models = { | |
| "Random Forest": "RFCsentimentAnalysis_model.pkl", | |
| "Logistic Regression": "LRsentimentAnalysis_model.pkl", | |
| "Multinomial Naïve Bayes": "MNBsentimentAnalysis_model.pkl" | |
| } | |
| with open("vectorizer.pkl", "rb") as vectorizer_file: | |
| vectorizer = joblib.load(vectorizer_file) | |
| app, model_eval = st.tabs(["Application", "Model Evaluation"]) | |
| # STREAMLIT APP TAB 1 | |
| with app: | |
| # Sidebar for model selection | |
| st.sidebar.header("Select Model") | |
| model_choice = st.sidebar.selectbox("Choose a model:", list(models.keys())) | |
| # Load selected model | |
| with open(models[model_choice], "rb") as model_file: | |
| model = joblib.load(model_file) | |
| # MAPPING RESULTS | |
| sentiment_mapping = {0: "Neutral", 1: "Positive", 2: "Negative"} | |
| # FUNCTION TO REDUCE TEXT TO ITS MOST BASIC FORM | |
| def clean_text(text): | |
| text = text.lower() | |
| text = re.sub(r'[^a-zA-Z\s]', '', text) | |
| text = ' '.join([word for word in text.split() if word not in stop_words]) | |
| return text | |
| # STREAMLIT UI | |
| st.title("Sentiment Analysis App") | |
| st.write("Enter text below to analyze its sentiment.") | |
| user_input = st.text_area("Enter text:") | |
| if st.button("Analyze Sentiment"): | |
| if user_input: | |
| cleaned_input = clean_text(user_input) | |
| transformed_input = vectorizer.transform([cleaned_input]) | |
| prediction = model.predict(transformed_input)[0] | |
| sentiment = sentiment_mapping[prediction] | |
| st.write(f"Predicted Sentiment: **{sentiment}**") | |
| else: | |
| st.write("Please enter some text to analyze.") | |
| with model_eval: | |
| st.header("Model Evaluation") | |
| st.write("The Sentiment Analysis model was trained in order to detect if a text is positive, negative, or neutral. The dataset was taken from kaggle.") | |
| st.write("Dataset by Ismiel Hossen Abir. Link: https://www.kaggle.com/datasets/mdismielhossenabir/sentiment-analysis") | |
| # SENTIMENT DISTRIBUTION | |
| st.header("Sentiment Distribution") | |
| st.write("The model was trained using a dataset with the total amount of text equivalent to the following labels") | |
| st.image("sentiment_distribution.png") | |
| # Confusion Matrix | |
| st.title("Confusion Matrix") | |
| st.write("The confusion matrix displays actual vs. predicted labels. Consider the following when interpreting it:") | |
| st.write("- **True Positives (TP):** Correctly predicted Spam") | |
| st.write("- **True Negatives (TN):** Correctly predicted Not Spam") | |
| st.write("- **False Positives (FP):** Predicted Spam but was actually Not Spam (Type I error)") | |
| st.write("- **False Negatives (FN):** Predicted Not Spam but was actually Spam (Type II error)") | |
| st.header("Naive Bayes Confusion Matrix") | |
| st.write("The image below represents the Confusion Matrix of the Naive Bayes model.") | |
| st.image("MNBConfusion Matrix.png") | |
| st.header("Logistic Regression Confusion Matrix") | |
| st.write("The image below represents the Confusion Matrix of the Logistic Regression model.") | |
| st.image("LRconfusion_matrix.png") | |
| st.header("Random Forest Confusion Matrix") | |
| st.write("The image below represents the Confusion Matrix of the Random Forest model.") | |
| st.image("RFCConfusion Matrix.png") | |
| # Evaluation Metrics | |
| st.title("Evaluation Metrics") | |
| st.write("Evaluation metrics help assess the performance of the sentiment analysis.") | |
| st.header("Naive Bayes Evaluation Metrics") | |
| st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Naive Bayes model.") | |
| st.image("MNBclassification_report.png") | |
| st.header("Logistic Regression Evaluation Metrics") | |
| st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Logistic Regression model.") | |
| st.image("LRclassification_report.png") | |
| st.header("Random Forest Evaluation Metrics") | |
| st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Random Forest Classifier model.") | |
| st.image("RFCclassification_report.png") | |
| # COMPARISON | |
| st.header("Comparison") | |
| st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the three classification algorithms chosen, Logistic Regression and Random Forests performs better than the Naive Bayes.") | |