| | import streamlit as st
|
| | import pandas as pd
|
| | import matplotlib.pyplot as plt
|
| | import seaborn as sns
|
| | import plotly.express as px
|
| | from PIL import Image
|
| | import os
|
| |
|
| |
|
| |
|
| |
|
| | BASE_DIR = os.path.dirname(__file__)
|
| |
|
| |
|
| |
|
| |
|
| | @st.cache_data
|
| | def load_data():
|
| | csv_path = os.path.join(BASE_DIR, 'singapore_airlines_reviews.csv')
|
| | df = pd.read_csv(csv_path)
|
| | return df
|
| |
|
| |
|
| | df = load_data()
|
| |
|
| |
|
| |
|
| |
|
| | def run():
|
| | st.title("ACRE - Automated Customer Review Analysis")
|
| | st.subheader("Exploratory Data Analysis (EDA)")
|
| |
|
| | st.markdown(
|
| | """
|
| | This section provides an exploratory data analysis (EDA) of Singapore Airlines (SQ) customer reviews.
|
| | We aim to understand the distribution of ratings, textual review characteristics, and topic modeling results.
|
| | """
|
| | )
|
| |
|
| |
|
| | st.write("### Dataset Preview")
|
| | st.dataframe(df.head())
|
| |
|
| |
|
| | st.write("### Distribution of Ratings")
|
| | fig, ax = plt.subplots(figsize=(8, 5))
|
| | sns.countplot(
|
| | x='rating',
|
| | data=df,
|
| | palette='viridis',
|
| | ax=ax,
|
| | order=sorted(df['rating'].unique())
|
| | )
|
| | for p in ax.patches:
|
| | height = p.get_height()
|
| | ax.annotate(f'{height:,}', (p.get_x() + p.get_width()/2, height),
|
| | ha='center', va='bottom', fontsize=10, fontweight='bold')
|
| | st.pyplot(fig)
|
| |
|
| |
|
| | st.write("### Distribution of Review Length")
|
| | df['text_length'] = df['text'].apply(lambda x: len(str(x).split()))
|
| | fig = px.histogram(df, x='text_length', nbins=50, title='Review Length Distribution')
|
| | st.plotly_chart(fig, use_container_width=True)
|
| |
|
| |
|
| | col1, col2 = st.columns(2)
|
| | with col1:
|
| | st.image(os.path.join(BASE_DIR, "Negative - Wordcloud.png"), caption="Negative - Wordcloud")
|
| | with col2:
|
| | st.image(os.path.join(BASE_DIR, "Positive - Wordcloud.png"), caption="Positive - Wordcloud")
|
| |
|
| |
|
| | st.write("## Topic Modeling Results")
|
| | col1, col2 = st.columns(2)
|
| | with col1:
|
| | st.image(os.path.join(BASE_DIR, "Negative - Top Words Distributions 10.png"), caption="Negative - Top Words Distributions")
|
| | with col2:
|
| | st.image(os.path.join(BASE_DIR, "Positive - Top Words Distributions 10.png"), caption="Positive - Top Words Distributions")
|
| |
|
| | col1, col2 = st.columns(2)
|
| | with col1:
|
| | st.image(os.path.join(BASE_DIR, "Negative - Topic Activities Over Time 10.png"), caption="Negative - Topic Activities Over Time")
|
| | with col2:
|
| | st.image(os.path.join(BASE_DIR, "Positive - Topic Activities Over Time 10.png"), caption="Positive - Topic Activities Over Time")
|
| |
|
| | col1, col2 = st.columns(2)
|
| | with col1:
|
| | st.image(os.path.join(BASE_DIR, "Negative - Topics Weights 10.png"), caption="Negative - Topics Weights")
|
| | with col2:
|
| | st.image(os.path.join(BASE_DIR, "Positive - Topics Weights 10.png"), caption="Positive - Topics Weights")
|
| |
|