| | import streamlit as st
|
| | import pandas as pd
|
| | import matplotlib.pyplot as plt
|
| | import seaborn as sns
|
| | import plotly.express as px
|
| | from PIL import Image
|
| |
|
| |
|
| |
|
| |
|
| | @st.cache_data
|
| | def load_data():
|
| | df = pd.read_csv('./src/singapore_airlines_reviews.csv')
|
| | return df
|
| |
|
| |
|
| | df = load_data()
|
| |
|
| |
|
| |
|
| |
|
| | def run():
|
| |
|
| | st.title("ACRE - Automated Customer Review Analysis")
|
| | st.subheader("Exploratory Data Analysis (EDA)")
|
| |
|
| | st.markdown(
|
| | """
|
| | This section provides an exploratory data analysis (EDA) of Singapore Airlines (SQ) customer reviews.
|
| | We aim to understand the distribution of ratings, textual review characteristics, and topic modeling results.
|
| | These insights serve as the foundation for building automated models that classify sentiment and uncover key themes
|
| | in customer feedback.
|
| | """
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| | st.write("### Dataset Preview")
|
| | st.dataframe(df.head())
|
| |
|
| |
|
| |
|
| |
|
| | st.write("### Distribution of Ratings")
|
| | fig, ax = plt.subplots(figsize=(8, 5))
|
| | sns.countplot(x='rating', data=df, palette='viridis', ax=ax,
|
| | order=sorted(df['rating'].unique()))
|
| | for p in ax.patches:
|
| | height = p.get_height()
|
| | ax.annotate(f'{height:,}', (p.get_x() + p.get_width()/2, height),
|
| | ha='center', va='bottom', fontsize=10, fontweight='bold')
|
| | st.pyplot(fig)
|
| |
|
| | st.markdown(
|
| | """
|
| | **Note:** Ratings are explored here only as descriptive information about passenger experiences.
|
| | In the inference page, actual sentiment will be predicted automatically from the review text using NLP techniques.
|
| | """
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| | st.write("### Distribution of Review Length")
|
| | df['text_length'] = df['text'].apply(lambda x: len(str(x).split()))
|
| | fig = px.histogram(df, x='text_length', nbins=50, title='Review Length Distribution')
|
| | st.plotly_chart(fig, use_container_width=True)
|
| |
|
| |
|
| |
|
| |
|
| | st.write("## Topic Modeling Results")
|
| |
|
| |
|
| | col1, col2 = st.columns(2)
|
| | with col1:
|
| | st.image("./src/Negative - Top Words Distributions.png", caption="Negative - Top Words Distributions")
|
| | with col2:
|
| | st.image("./src/Positive - Top Words Distributions.png", caption="Positive - Top Words Distributions")
|
| | st.write("Lorem ipsum explanation for Top Words Distributions.")
|
| |
|
| |
|
| | col1, col2 = st.columns(2)
|
| | with col1:
|
| | st.image("./src/Negative - Topic Activities Over Time.png", caption="Negative - Topic Activities Over Time")
|
| | with col2:
|
| | st.image("./src/Positive - Topic Activities Over Time.png", caption="Positive - Topic Activities Over Time")
|
| | st.write("Lorem ipsum explanation for Topic Activities Over Time.")
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | col1, col2 = st.columns(2)
|
| | with col1:
|
| | st.image("./src/Negative - Topics Weights.png", caption="Negative - Topic Weights")
|
| | with col2:
|
| | st.image("./src/Positive - Topics Weights.png", caption="Positive - Topic Weights")
|
| | st.write("Lorem ipsum explanation for Topics Weights.")
|
| |
|
| |
|
| |
|
| |
|
| | if __name__ == '__main__':
|
| | run() |