File size: 4,189 Bytes
b0f281e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f313894
b0f281e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7b15fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from PIL import Image

# =============================================
# Cache dataset agar tidak reload setiap kali
# =============================================
def load_data():
    df = pd.read_csv('./src/singapore_airlines_reviews.csv')
    return df

# Load dataset
df = load_data()

# =============================================
# Main app
# =============================================
def run():
    # Judul dan Subjudul
    # st.title("ACRE - Automated Customer Review Analysis")
    st.subheader("Exploratory Data Analysis (EDA)")

    st.markdown(
        """
        This section provides an exploratory data analysis (EDA) of Singapore Airlines (SQ) customer reviews.  
        We aim to understand the distribution of ratings, textual review characteristics, and topic modeling results.  
        These insights serve as the foundation for building automated models that classify sentiment and uncover key themes 
        in customer feedback.
        """
    )

    # ===============================
    # Dataset Preview
    # ===============================
    st.write("### Dataset Preview")
    st.dataframe(df.head())

    # ===============================
    # Distribusi Rating
    # ===============================
    st.write("### Distribution of Ratings")
    fig, ax = plt.subplots(figsize=(8, 5))
    sns.countplot(x='rating', data=df, palette='viridis', ax=ax,
                  order=sorted(df['rating'].unique()))
    for p in ax.patches:
        height = p.get_height()
        ax.annotate(f'{height:,}', (p.get_x() + p.get_width()/2, height),
                    ha='center', va='bottom', fontsize=10, fontweight='bold')
    st.pyplot(fig)

    st.markdown(
        """
        **Note:** Ratings are explored here only as descriptive information about passenger experiences. 
        In the inference page, actual sentiment will be predicted automatically from the review text using NLP techniques.
        """
    )

    # ===============================
    # Analisis Panjang Teks
    # ===============================
    st.write("### Distribution of Review Length")
    df['text_length'] = df['text'].apply(lambda x: len(str(x).split()))
    fig = px.histogram(df, x='text_length', nbins=50, title='Review Length Distribution')
    st.plotly_chart(fig, use_container_width=True)

    # Wordcloud
    col1, col2 = st.columns(2)
    with col1:
        st.image("./src/Negative - Wordcloud.png", caption="Negative - Wordcloud")
    with col2:
        st.image("./src/Positive - Wordcloud.png", caption="Positive - Wordcloud")
    # st.write("Lorem ipsum explanation for Topics Hierarchy.")

    # ===============================
    # Topic Modeling Results (Images)
    # ===============================
    st.write("## Topic Modeling Results")

    # 1. Top Words Distributions
    col1, col2 = st.columns(2)
    with col1:
        st.image("./src/Negative - Top Words Distributions.png", caption="Negative - Top Words Distributions")
    with col2:
        st.image("./src/Positive - Top Words Distributions.png", caption="Positive - Top Words Distributions")
    # st.write("Lorem ipsum explanation for Top Words Distributions.")

    # 2. Topic Activities Over Time
    col1, col2 = st.columns(2)
    with col1:
        st.image("./src/Negative - Topic Activities Over Time.png", caption="Negative - Topic Activities Over Time")
    with col2:
        st.image("./src/Positive - Topic Activities Over Time.png", caption="Positive - Topic Activities Over Time")
    # st.write("Lorem ipsum explanation for Topic Activities Over Time.")

    # 3. Topics Weights
    col1, col2 = st.columns(2)
    with col1:
        st.image("./src/Negative - Topics Weights.png", caption="Negative - Topics Weights")
    with col2:
        st.image("./src/Positive - Topics Weights.png", caption="Positive - Topics Weights")
    # st.write("Lorem ipsum explanation for Topics Weights.")

# =============================================
# Run Script
# =============================================
if __name__ == '__main__':
    run()