|
|
import streamlit as st
|
|
|
import pandas as pd
|
|
|
import matplotlib.pyplot as plt
|
|
|
import seaborn as sns
|
|
|
import plotly.express as px
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data
|
|
|
def load_data():
|
|
|
df = pd.read_csv('./src/singapore_airlines_reviews.csv')
|
|
|
return df
|
|
|
|
|
|
|
|
|
df = load_data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run():
|
|
|
|
|
|
st.title("ACRE - Automated Customer Review Analysis")
|
|
|
st.subheader("Exploratory Data Analysis (EDA)")
|
|
|
|
|
|
st.markdown(
|
|
|
"""
|
|
|
This section provides an exploratory data analysis (EDA) of Singapore Airlines (SQ) customer reviews.
|
|
|
We aim to understand the distribution of ratings, textual review characteristics, and topic modeling results.
|
|
|
These insights serve as the foundation for building automated models that classify sentiment and uncover key themes
|
|
|
in customer feedback.
|
|
|
"""
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.write("### Dataset Preview")
|
|
|
st.dataframe(df.head())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.write("### Distribution of Ratings")
|
|
|
fig, ax = plt.subplots(figsize=(8, 5))
|
|
|
sns.countplot(x='rating', data=df, palette='viridis', ax=ax,
|
|
|
order=sorted(df['rating'].unique()))
|
|
|
for p in ax.patches:
|
|
|
height = p.get_height()
|
|
|
ax.annotate(f'{height:,}', (p.get_x() + p.get_width()/2, height),
|
|
|
ha='center', va='bottom', fontsize=10, fontweight='bold')
|
|
|
st.pyplot(fig)
|
|
|
|
|
|
st.markdown(
|
|
|
"""
|
|
|
**Note:** Ratings are explored here only as descriptive information about passenger experiences.
|
|
|
In the inference page, actual sentiment will be predicted automatically from the review text using NLP techniques.
|
|
|
"""
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.write("### Distribution of Review Length")
|
|
|
df['text_length'] = df['text'].apply(lambda x: len(str(x).split()))
|
|
|
fig = px.histogram(df, x='text_length', nbins=50, title='Review Length Distribution')
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.write("## Topic Modeling Results")
|
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
with col1:
|
|
|
st.image("./src/Negative - Top Words Distributions.png", caption="Negative - Top Words Distributions")
|
|
|
with col2:
|
|
|
st.image("./src/Positive - Top Words Distributions.png", caption="Positive - Top Words Distributions")
|
|
|
st.write("Lorem ipsum explanation for Top Words Distributions.")
|
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
with col1:
|
|
|
st.image("./src/Negative - Topic Activities Over Time.png", caption="Negative - Topic Activities Over Time")
|
|
|
with col2:
|
|
|
st.image("./src/Positive - Topic Activities Over Time.png", caption="Positive - Topic Activities Over Time")
|
|
|
st.write("Lorem ipsum explanation for Topic Activities Over Time.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
with col1:
|
|
|
st.image("./src/Negative - Topics Weights.png", caption="Negative - Topic Weights")
|
|
|
with col2:
|
|
|
st.image("./src/Positive - Topics Weights.png", caption="Positive - Topic Weights")
|
|
|
st.write("Lorem ipsum explanation for Topics Weights.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
run() |