|
|
import streamlit as st
|
|
|
import pandas as pd
|
|
|
import matplotlib.pyplot as plt
|
|
|
import seaborn as sns
|
|
|
import plotly.express as px
|
|
|
from PIL import Image
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BASE_DIR = os.path.dirname(__file__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data
|
|
|
def load_data():
|
|
|
csv_path = os.path.join(BASE_DIR, 'singapore_airlines_reviews.csv')
|
|
|
df = pd.read_csv(csv_path)
|
|
|
return df
|
|
|
|
|
|
|
|
|
df = load_data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run():
|
|
|
st.title("ACRE - Automated Customer Review Analysis")
|
|
|
st.subheader("Exploratory Data Analysis (EDA)")
|
|
|
|
|
|
st.markdown(
|
|
|
"""
|
|
|
This section provides an exploratory data analysis (EDA) of Singapore Airlines (SQ) customer reviews.
|
|
|
We aim to understand the distribution of ratings, textual review characteristics, and topic modeling results.
|
|
|
"""
|
|
|
)
|
|
|
|
|
|
|
|
|
st.write("### Dataset Preview")
|
|
|
st.dataframe(df.head())
|
|
|
|
|
|
|
|
|
st.write("### Distribution of Ratings")
|
|
|
fig, ax = plt.subplots(figsize=(8, 5))
|
|
|
sns.countplot(
|
|
|
x='rating',
|
|
|
data=df,
|
|
|
palette='viridis',
|
|
|
ax=ax,
|
|
|
order=sorted(df['rating'].unique())
|
|
|
)
|
|
|
for p in ax.patches:
|
|
|
height = p.get_height()
|
|
|
ax.annotate(f'{height:,}', (p.get_x() + p.get_width()/2, height),
|
|
|
ha='center', va='bottom', fontsize=10, fontweight='bold')
|
|
|
st.pyplot(fig)
|
|
|
|
|
|
|
|
|
st.write("### Distribution of Review Length")
|
|
|
df['text_length'] = df['text'].apply(lambda x: len(str(x).split()))
|
|
|
fig = px.histogram(df, x='text_length', nbins=50, title='Review Length Distribution')
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
with col1:
|
|
|
st.image(os.path.join(BASE_DIR, "Negative - Wordcloud.png"), caption="Negative - Wordcloud")
|
|
|
with col2:
|
|
|
st.image(os.path.join(BASE_DIR, "Positive - Wordcloud.png"), caption="Positive - Wordcloud")
|
|
|
|
|
|
|
|
|
st.write("## Topic Modeling Results")
|
|
|
col1, col2 = st.columns(2)
|
|
|
with col1:
|
|
|
st.image(os.path.join(BASE_DIR, "Negative - Top Words Distributions 10.png"), caption="Negative - Top Words Distributions")
|
|
|
with col2:
|
|
|
st.image(os.path.join(BASE_DIR, "Positive - Top Words Distributions 10.png"), caption="Positive - Top Words Distributions")
|
|
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
with col1:
|
|
|
st.image(os.path.join(BASE_DIR, "Negative - Topic Activities Over Time 10.png"), caption="Negative - Topic Activities Over Time")
|
|
|
with col2:
|
|
|
st.image(os.path.join(BASE_DIR, "Positive - Topic Activities Over Time 10.png"), caption="Positive - Topic Activities Over Time")
|
|
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
with col1:
|
|
|
st.image(os.path.join(BASE_DIR, "Negative - Topics Weights 10.png"), caption="Negative - Topics Weights")
|
|
|
with col2:
|
|
|
st.image(os.path.join(BASE_DIR, "Positive - Topics Weights 10.png"), caption="Positive - Topics Weights")
|
|
|
|