Sentiment_Analysis_And_Topic_Modelling

Sleeping

App Files Files Community

Sentiment_Analysis_And_Topic_Modelling / src /eda.py

hanantonio

Upload 10 files

4a9a3c2 verified 5 months ago

raw

history blame contribute delete

3.45 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	from PIL import Image
	import os

	# =============================================
	# Base directory (works in container)
	# =============================================
	BASE_DIR = os.path.dirname(__file__)

	# =============================================
	# Cache dataset to avoid reload every time
	# =============================================
	@st.cache_data
	def load_data():
	csv_path = os.path.join(BASE_DIR, 'singapore_airlines_reviews.csv')
	df = pd.read_csv(csv_path)
	return df

	# Load dataset
	df = load_data()

	# =============================================
	# Main EDA function
	# =============================================
	def run():
	st.title("ACRE - Automated Customer Review Analysis")
	st.subheader("Exploratory Data Analysis (EDA)")

	st.markdown(
	"""
	This section provides an exploratory data analysis (EDA) of Singapore Airlines (SQ) customer reviews.
	We aim to understand the distribution of ratings, textual review characteristics, and topic modeling results.
	"""
	)

	# Dataset preview
	st.write("### Dataset Preview")
	st.dataframe(df.head())

	# Distribution of ratings
	st.write("### Distribution of Ratings")
	fig, ax = plt.subplots(figsize=(8, 5))
	sns.countplot(
	x='rating',
	data=df,
	palette='viridis',
	ax=ax,
	order=sorted(df['rating'].unique())
	)
	for p in ax.patches:
	height = p.get_height()
	ax.annotate(f'{height:,}', (p.get_x() + p.get_width()/2, height),
	ha='center', va='bottom', fontsize=10, fontweight='bold')
	st.pyplot(fig)

	# Distribution of review length
	st.write("### Distribution of Review Length")
	df['text_length'] = df['text'].apply(lambda x: len(str(x).split()))
	fig = px.histogram(df, x='text_length', nbins=50, title='Review Length Distribution')
	st.plotly_chart(fig, use_container_width=True)

	# Wordclouds
	col1, col2 = st.columns(2)
	with col1:
	st.image(os.path.join(BASE_DIR, "Negative - Wordcloud.png"), caption="Negative - Wordcloud")
	with col2:
	st.image(os.path.join(BASE_DIR, "Positive - Wordcloud.png"), caption="Positive - Wordcloud")

	# Topic Modeling Results
	st.write("## Topic Modeling Results")
	col1, col2 = st.columns(2)
	with col1:
	st.image(os.path.join(BASE_DIR, "Negative - Top Words Distributions 10.png"), caption="Negative - Top Words Distributions")
	with col2:
	st.image(os.path.join(BASE_DIR, "Positive - Top Words Distributions 10.png"), caption="Positive - Top Words Distributions")

	col1, col2 = st.columns(2)
	with col1:
	st.image(os.path.join(BASE_DIR, "Negative - Topic Activities Over Time 10.png"), caption="Negative - Topic Activities Over Time")
	with col2:
	st.image(os.path.join(BASE_DIR, "Positive - Topic Activities Over Time 10.png"), caption="Positive - Topic Activities Over Time")

	col1, col2 = st.columns(2)
	with col1:
	st.image(os.path.join(BASE_DIR, "Negative - Topics Weights 10.png"), caption="Negative - Topics Weights")
	with col2:
	st.image(os.path.join(BASE_DIR, "Positive - Topics Weights 10.png"), caption="Positive - Topics Weights")