|
|
|
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
import joblib |
|
|
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix |
|
|
from sklearn.model_selection import train_test_split |
|
|
|
|
|
|
|
|
import joblib |
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(): |
|
|
return joblib.load("SA_model.pkl") |
|
|
|
|
|
model = load_model() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.read_csv('Tweets.csv', encoding='utf-8') |
|
|
X = df['text'] |
|
|
y = df['airline_sentiment'] |
|
|
X_train, X_test, y_train, y_test = train_test_split( |
|
|
X, y , test_size=0.33, random_state=42 |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
class_report_data = { |
|
|
"Precision": [0.67, 0.51, 0.88], |
|
|
"Recall": [0.73, 0.64, 0.79], |
|
|
"F1-score": [0.70, 0.57, 0.83] |
|
|
} |
|
|
|
|
|
|
|
|
class_report_df = pd.DataFrame(class_report_data, index=["Positive", "Neutral", "Negative"]) |
|
|
|
|
|
|
|
|
def predict_sentiment(text): |
|
|
if isinstance(text, str): |
|
|
text = [text] |
|
|
|
|
|
prediction = model.predict(text) |
|
|
|
|
|
|
|
|
sentiment_mapping = {0: "Negative", 1: "Neutral", 2: "Positive"} |
|
|
|
|
|
return sentiment_mapping.get(prediction[0], "Unknown") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title('π Sentiment Analysis Web Application') |
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
## π Introduction |
|
|
Welcome to the **Sentiment Analysis Web Application**! This tool is designed to analyze the sentiment of text messages |
|
|
using a **Support Vector Machine (SVM) model**. The model has been trained on the **Airline Tweets dataset from Kaggle** |
|
|
and classifies text into three sentiment categories: |
|
|
- β
**Positive** |
|
|
- β **Negative** |
|
|
- β **Neutral** |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(['π Dataset Preview', 'π Model Performance', 'π Sentiment Prediction']) |
|
|
|
|
|
with tab1: |
|
|
st.markdown( |
|
|
""" |
|
|
## π Dataset Preview |
|
|
The dataset used for training this model consists of tweets related to airline services. Each tweet is labeled |
|
|
with one of the three sentiment categories (**Positive, Negative, or Neutral**). Below is a sample of the dataset: |
|
|
""" |
|
|
) |
|
|
st.write (df) |
|
|
|
|
|
|
|
|
with tab2: |
|
|
st.markdown( |
|
|
""" |
|
|
## π Model Performance |
|
|
Below are the key performance metrics of the trained **Support Vector Machine (SVM)** model: |
|
|
|
|
|
- **Model Accuracy**: The percentage of correctly classified instances. |
|
|
- **Classification Report**: Includes precision and recall for each sentiment class. |
|
|
- **Confusion Matrix**: A visual representation comparing actual versus predicted classifications. |
|
|
""" |
|
|
) |
|
|
|
|
|
st.write(f"**π Model Accuracy:** 75%") |
|
|
|
|
|
st.markdown("### π Classification Report") |
|
|
st.dataframe(class_report_df) |
|
|
|
|
|
st.markdown("### π’ Confusion Matrix") |
|
|
|
|
|
|
|
|
try: |
|
|
st.image("cmap.png", caption="Confusion Matrix", use_container_width=True) |
|
|
except FileNotFoundError: |
|
|
st.warning("β οΈ Confusion matrix image not found. Please check the file path.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with tab3: |
|
|
st.markdown( |
|
|
""" |
|
|
## π Sentiment Prediction |
|
|
Type a sentence in the text box below, and the model will classify it as **Positive, Neutral, or Negative**. |
|
|
""" |
|
|
) |
|
|
|
|
|
user_input = st.text_area("Type your sentence here:", "") |
|
|
|
|
|
if st.button("π Analyze Sentiment"): |
|
|
if user_input.strip(): |
|
|
sentiment_result = predict_sentiment(user_input) |
|
|
st.success(f"### π― Prediction: **{sentiment_result}**") |
|
|
else: |
|
|
st.warning("β οΈ Please enter a valid text input.") |
|
|
|