import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px from PIL import Image import os # ============================================= # Base directory (works in container) # ============================================= BASE_DIR = os.path.dirname(__file__) # ============================================= # Cache dataset to avoid reload every time # ============================================= @st.cache_data def load_data(): csv_path = os.path.join(BASE_DIR, 'singapore_airlines_reviews.csv') df = pd.read_csv(csv_path) return df # Load dataset df = load_data() # ============================================= # Main EDA function # ============================================= def run(): st.title("ACRE - Automated Customer Review Analysis") st.subheader("Exploratory Data Analysis (EDA)") st.markdown( """ This section provides an exploratory data analysis (EDA) of Singapore Airlines (SQ) customer reviews. We aim to understand the distribution of ratings, textual review characteristics, and topic modeling results. """ ) # Dataset preview st.write("### Dataset Preview") st.dataframe(df.head()) # Distribution of ratings st.write("### Distribution of Ratings") fig, ax = plt.subplots(figsize=(8, 5)) sns.countplot( x='rating', data=df, palette='viridis', ax=ax, order=sorted(df['rating'].unique()) ) for p in ax.patches: height = p.get_height() ax.annotate(f'{height:,}', (p.get_x() + p.get_width()/2, height), ha='center', va='bottom', fontsize=10, fontweight='bold') st.pyplot(fig) # Distribution of review length st.write("### Distribution of Review Length") df['text_length'] = df['text'].apply(lambda x: len(str(x).split())) fig = px.histogram(df, x='text_length', nbins=50, title='Review Length Distribution') st.plotly_chart(fig, use_container_width=True) # Wordclouds col1, col2 = st.columns(2) with col1: st.image(os.path.join(BASE_DIR, "Negative - Wordcloud.png"), caption="Negative - Wordcloud") with col2: st.image(os.path.join(BASE_DIR, "Positive - Wordcloud.png"), caption="Positive - Wordcloud") # Topic Modeling Results st.write("## Topic Modeling Results") col1, col2 = st.columns(2) with col1: st.image(os.path.join(BASE_DIR, "Negative - Top Words Distributions 10.png"), caption="Negative - Top Words Distributions") with col2: st.image(os.path.join(BASE_DIR, "Positive - Top Words Distributions 10.png"), caption="Positive - Top Words Distributions") col1, col2 = st.columns(2) with col1: st.image(os.path.join(BASE_DIR, "Negative - Topic Activities Over Time 10.png"), caption="Negative - Topic Activities Over Time") with col2: st.image(os.path.join(BASE_DIR, "Positive - Topic Activities Over Time 10.png"), caption="Positive - Topic Activities Over Time") col1, col2 = st.columns(2) with col1: st.image(os.path.join(BASE_DIR, "Negative - Topics Weights 10.png"), caption="Negative - Topics Weights") with col2: st.image(os.path.join(BASE_DIR, "Positive - Topics Weights 10.png"), caption="Positive - Topics Weights")