Spaces:

NTDuy
/

vietnamese_ecommerce_topic_classification

Paused

App Files Files Community

vietnamese_ecommerce_topic_classification / graphs.py

NTDuy

format text

697875a verified over 1 year ago

raw

history blame contribute delete

10.6 kB


	import streamlit as st
	import plotly.express as px
	import plotly.graph_objects as go
	import pandas as pd
	from plotly.subplots import make_subplots
	import plotly.graph_objects as go
	from wordcloud import WordCloud
	from underthesea import word_tokenize
	import matplotlib.pyplot as plt


	def sentence_topic_plot(result):
	labels = ["Quality", "Serve", "Pack", "Shipping", "Price", "Other"][::-1]
	values = result.detach().numpy()[0][::-1]
	combined = {labels[i] : values[i] for i in range(len(labels))}
	sorted_data = dict(sorted(combined.items(), key=lambda item: item[1]))
	labels = list(sorted_data.keys())
	values = list(sorted_data.values())
	filtered_data = {key: value for key, value in combined.items() if value >= 0.5}
	fig = go.Figure()
	fig.add_trace(go.Bar(
	y=labels,
	x=values,
	orientation='h'
	))
	fig.update_layout(xaxis_title="Probability", yaxis_title="Topics")
	message = ", ".join(i for i in filtered_data.keys())
	st.header(f"Your review is related to :blue[{message}]" )
	st.plotly_chart(fig, use_container_width=True)

	def KPI_card(name = "Total Reviews", value = 1000, box_color = (123,167,212), font_color = (0, 0, 0), icon = "fa-list"):
	wch_colour_box = box_color
	wch_colour_font = font_color
	fontsize = 20
	valign = "left"
	iconname = icon
	sline = name # kpi name

	i = value # kpi value

	htmlstr = f"""<p style='background-color:rgb({wch_colour_box[0]}, {wch_colour_box[1]}, {wch_colour_box[2]});
	color: rgb({wch_colour_font[0]}, {wch_colour_font[1]}, {wch_colour_font[2]});
	font-size: {fontsize + 10}px;
	font-family: "Source Sans Pro", sans-serif;
	font-weight: 600;
	border-radius: 20px;
	padding-left: 20px;
	padding-top: 30px;
	padding-bottom: 30px;
	line-height:30px; text-align: center;'>
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
	<i class="fa-solid fas {iconname} fa-2x fa-pull-left "></i>
	<span style='font-size: {fontsize}px;
	margin-top: 0'>{sline}</span><BR>
	{i}
	</style>
	</p>"""
	return htmlstr

	def rating_distribution_pie_chart(classification_df):
	pie_data = classification_df["rating"].value_counts().sort_index()
	fig_pie = px.pie(pie_data,height=150, values=pie_data.values, names=pie_data.index, color=pie_data.values, color_discrete_sequence=px.colors.sequential.Blues)

	fig_pie.update_traces(sort=False)
	fig_pie.update_layout(margin=dict(t=0, b=0, l=0, r=0))
	st.plotly_chart(fig_pie, use_container_width=True,height=100)

	def kpi_total_reviews(classification_df):
	total = str(len(classification_df))
	total_reviews_card = KPI_card(name = "Total Reviews", value = total, icon = "fa-list")
	st.markdown(total_reviews_card, unsafe_allow_html=True)

	def kpi_average_rating(classification_df):
	average_rating = round(classification_df["rating"].mean(), 1)
	avg_rating_card = KPI_card(name = "Average Rating", value = average_rating, icon = "fa-star")
	st.markdown(avg_rating_card, unsafe_allow_html=True)


	def time_series_comments(classification_df, freq = "D", metric = "Count Reviews"):
	labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
	ts_plot_data = pd.DataFrame()
	grouped_by_day = classification_df.groupby(pd.Grouper(key='time', freq = freq))
	if metric == "Count Reviews":
	ts_plot_data[labels] = grouped_by_day[labels].sum()
	ts_plot_data["Total"] = grouped_by_day["comment"].count()
	ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)

	elif metric == "Average Rating":
	for i in labels:
	ts_plot_data[i] = classification_df[classification_df[i] == 1].groupby(pd.Grouper(key='time', freq = freq))["rating"].mean()
	ts_plot_data["Total"] = grouped_by_day["rating"].mean()
	ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)

	ts_plot_data = ts_plot_data.ffill()
	fig = px.line(ts_plot_data, x = "time", y = ts_plot_data.columns, height=300, title = f"{metric} on time")
	fig.update_layout(
	title=dict(font=dict(size=20), y=1, x=0),
	legend=dict(
	orientation="h",
	entrywidth=90,
	yanchor="bottom",
	y= 0,
	xanchor="right",
	x=1, title = None, traceorder = "normal", yref = "container"
	), yaxis_title = metric, xaxis = dict(showgrid = False), yaxis = dict(showgrid = False), margin=dict(r=5, l=5, t=50, b=5))


	st.plotly_chart(fig, use_container_width = True)


	def hor_barchart(classification_df, metric = "Count Reviews"):
	labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
	data = {}
	if metric == "Average Rating":
	for i in labels:
	data[i] = classification_df[classification_df[i] == 1]["rating"].mean()
	data = pd.Series(data)

	elif metric == "Count Reviews":
	data = classification_df[labels].sum().sort_values(ascending = True)

	fig = px.bar(data, orientation = "h", width = 350, title = "Most commented topic")
	fig.update_layout(yaxis_title=None, height=400, xaxis_visible = False, showlegend = False, title=dict(font=dict(size=20), y=0.85,
	x=0))
	st.plotly_chart(fig, use_container_width=True)


	def print_reviews(classification_df):
	col_1, col_2 = st.columns([1, 3], gap="large")

	with col_1:
	labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
	viewing_method = st.selectbox("$$ \\bold{Viewing \: method: } $$", ["Individual Comments", "Wordcloud"])
	filter_by = st.multiselect("$$ \\bold{Select \: reviews \: related \: to: } $$", labels)
	if filter_by:
	selected_labels = ", ".join(filter_by)
	else:
	selected_labels = "All topics"
	df_filter = classification_df.copy()
	if filter_by:
	df_filter = df_filter[df_filter[filter_by].all(axis=1)]

	if viewing_method == "Individual Comments":
	with col_1:
	filter_rating = st.slider("$$ \\bold{Rating \: range: \:} $$", min_value = 1, max_value= 5, value = (1, 5), step= 1)
	df_filter = df_filter[(df_filter["rating"] >= filter_rating[0]) & (df_filter["rating"] <= filter_rating[1])]
	df_filter = df_filter.sort_values("time", ascending = False)

	if not df_filter.empty:
	top_n = st.slider("$$ \\bold{Print \: top: \:} $$", min_value = 1, max_value= len(df_filter), value = int(len(df_filter) / 10), step= 1)
	else:
	top_n = 0
	with col_2:
	st.header(f"Displaying {top_n} most recent reviews related to :red[{selected_labels}]")
	if not df_filter.empty:
	comment_container = st.container(height=300)
	with comment_container:
	for i in range(top_n):
	st.markdown(f"Reviews {i + 1}:")
	comment = df_filter["comment"].iloc[i]
	time = df_filter["time"].iloc[i]
	rating = ":star:" * df_filter["rating"].iloc[i]
	sender = df_filter["username"].iloc[i]
	topics = [topic for topic in labels if df_filter[topic].iloc[i] == 1]
	topics_str = ", ".join(topics)
	col_1, col_2 = st.columns([1, 2])
	col_1.markdown(f"From: {sender} \n Time: {time} \n Rating: {rating}")
	col_1.markdown(f"Topics: {topics_str}")
	col_2.markdown(comment)
	st.markdown("---")
	else:
	st.markdown("No comment satisfy the condition")
	else:
	with col_2:
	st.header(f"Wordcloud for reviews related to :red[{selected_labels}]")
	text = " ".join(comment for comment in df_filter["comment"].str.lower().values)
	text = word_tokenize(text, format = "text")
	word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)
	fig, ax = plt.subplots()
	# Plot the word cloud on the axes
	ax.imshow(word_cloud)
	ax.axis("off")
	st.pyplot(fig)
	def tornado_chart(df):
	labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]

	avg_rating = {}
	for i in labels:
	avg_rating[i] = df[df[i] == 1]["rating"].mean()

	count_reviews = df[labels].sum().sort_values(ascending = True)
	avg_rating = pd.Series(avg_rating).reindex(index = count_reviews.index)

	fig = make_subplots(
	rows=1
	,cols=2
	,vertical_spacing=0
	)

	fig_add = fig.add_trace(
	go.Histogram(
	x= count_reviews.values
	,y= count_reviews.index
	,histfunc='sum'
	,orientation='h'
	,opacity=0.6, name='Count Reviews')
	,row=1
	,col=1
	)

	fig_add = fig.add_trace(
	go.Histogram(
	x= avg_rating.values
	,y= avg_rating.index
	,histfunc='sum'
	,orientation='h'
	,opacity=0.6, name='Average Rating')
	,row=1
	,col=2
	)


	fig_add = fig.update_xaxes(
	autorange="reversed"
	,row=1
	,col=1)

	fig_add = fig.update_xaxes(
	tickmode='linear'
	,dtick=1
	,row=1
	,col=2)

	fig_add = fig.update_layout(
	title="Review Count and Average Rating by Topic",
	)
	fig_add = fig.update_yaxes(
	visible=False
	,row=1
	,col=2)

	fig.update_layout(xaxis=dict(domain=[0.0, 0.45]), xaxis2=dict(domain=[0.45, 0.90]))
	fig.update_layout(legend=dict(orientation='h', xanchor='center', x=0.45))
	fig.update_layout(
	width=500,
	height=300
	)

	fig.update_layout(
	title=dict(font=dict(size=18), y=1, x=0),
	legend=dict(
	orientation="h",
	entrywidth=90,
	yanchor="bottom",
	y= 0,
	xanchor="right",
	x=1, title = None, traceorder = "normal", yref = "container"
	))
	fig.update_layout(margin=dict(r=5, l=5, t=50, b=0))
	st.plotly_chart(fig, use_container_width = True)