Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotly.express as px | |
| from PIL import Image | |
| import os | |
| # ============================================= | |
| # Base directory (works in container) | |
| # ============================================= | |
| BASE_DIR = os.path.dirname(__file__) | |
| # ============================================= | |
| # Cache dataset to avoid reload every time | |
| # ============================================= | |
| def load_data(): | |
| csv_path = os.path.join(BASE_DIR, 'singapore_airlines_reviews.csv') | |
| df = pd.read_csv(csv_path) | |
| return df | |
| # Load dataset | |
| df = load_data() | |
| # ============================================= | |
| # Main EDA function | |
| # ============================================= | |
| def run(): | |
| st.title("ACRE - Automated Customer Review Analysis") | |
| st.subheader("Exploratory Data Analysis (EDA)") | |
| st.markdown( | |
| """ | |
| This section provides an exploratory data analysis (EDA) of Singapore Airlines (SQ) customer reviews. | |
| We aim to understand the distribution of ratings, textual review characteristics, and topic modeling results. | |
| """ | |
| ) | |
| # Dataset preview | |
| st.write("### Dataset Preview") | |
| st.dataframe(df.head()) | |
| # Distribution of ratings | |
| st.write("### Distribution of Ratings") | |
| fig, ax = plt.subplots(figsize=(8, 5)) | |
| sns.countplot( | |
| x='rating', | |
| data=df, | |
| palette='viridis', | |
| ax=ax, | |
| order=sorted(df['rating'].unique()) | |
| ) | |
| for p in ax.patches: | |
| height = p.get_height() | |
| ax.annotate(f'{height:,}', (p.get_x() + p.get_width()/2, height), | |
| ha='center', va='bottom', fontsize=10, fontweight='bold') | |
| st.pyplot(fig) | |
| # Distribution of review length | |
| st.write("### Distribution of Review Length") | |
| df['text_length'] = df['text'].apply(lambda x: len(str(x).split())) | |
| fig = px.histogram(df, x='text_length', nbins=50, title='Review Length Distribution') | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Wordclouds | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.image(os.path.join(BASE_DIR, "Negative - Wordcloud.png"), caption="Negative - Wordcloud") | |
| with col2: | |
| st.image(os.path.join(BASE_DIR, "Positive - Wordcloud.png"), caption="Positive - Wordcloud") | |
| # Topic Modeling Results | |
| st.write("## Topic Modeling Results") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.image(os.path.join(BASE_DIR, "Negative - Top Words Distributions 10.png"), caption="Negative - Top Words Distributions") | |
| with col2: | |
| st.image(os.path.join(BASE_DIR, "Positive - Top Words Distributions 10.png"), caption="Positive - Top Words Distributions") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.image(os.path.join(BASE_DIR, "Negative - Topic Activities Over Time 10.png"), caption="Negative - Topic Activities Over Time") | |
| with col2: | |
| st.image(os.path.join(BASE_DIR, "Positive - Topic Activities Over Time 10.png"), caption="Positive - Topic Activities Over Time") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.image(os.path.join(BASE_DIR, "Negative - Topics Weights 10.png"), caption="Negative - Topics Weights") | |
| with col2: | |
| st.image(os.path.join(BASE_DIR, "Positive - Topics Weights 10.png"), caption="Positive - Topics Weights") | |