| |
|
| | import streamlit as st |
| | import plotly.express as px |
| | import plotly.graph_objects as go |
| | import pandas as pd |
| | from plotly.subplots import make_subplots |
| | import plotly.graph_objects as go |
| | from wordcloud import WordCloud |
| | from underthesea import word_tokenize |
| | import matplotlib.pyplot as plt |
| |
|
| |
|
| | def sentence_topic_plot(result): |
| | labels = ["Quality", "Serve", "Pack", "Shipping", "Price", "Other"][::-1] |
| | values = result.detach().numpy()[0][::-1] |
| | combined = {labels[i] : values[i] for i in range(len(labels))} |
| | sorted_data = dict(sorted(combined.items(), key=lambda item: item[1])) |
| | labels = list(sorted_data.keys()) |
| | values = list(sorted_data.values()) |
| | filtered_data = {key: value for key, value in combined.items() if value >= 0.5} |
| | fig = go.Figure() |
| | fig.add_trace(go.Bar( |
| | y=labels, |
| | x=values, |
| | orientation='h' |
| | )) |
| | fig.update_layout(xaxis_title="Probability", yaxis_title="Topics") |
| | message = ", ".join(i for i in filtered_data.keys()) |
| | st.header(f"Your review is related to :blue[{message}]" ) |
| | st.plotly_chart(fig, use_container_width=True) |
| |
|
| | def KPI_card(name = "Total Reviews", value = 1000, box_color = (123,167,212), font_color = (0, 0, 0), icon = "fa-list"): |
| | wch_colour_box = box_color |
| | wch_colour_font = font_color |
| | fontsize = 20 |
| | valign = "left" |
| | iconname = icon |
| | sline = name |
| | |
| | i = value |
| |
|
| | htmlstr = f"""<p style='background-color:rgb({wch_colour_box[0]}, {wch_colour_box[1]}, {wch_colour_box[2]}); |
| | color: rgb({wch_colour_font[0]}, {wch_colour_font[1]}, {wch_colour_font[2]}); |
| | font-size: {fontsize + 10}px; |
| | font-family: "Source Sans Pro", sans-serif; |
| | font-weight: 600; |
| | border-radius: 20px; |
| | padding-left: 20px; |
| | padding-top: 30px; |
| | padding-bottom: 30px; |
| | line-height:30px; text-align: center;'> |
| | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css"> |
| | <i class="fa-solid fas {iconname} fa-2x fa-pull-left "></i> |
| | <span style='font-size: {fontsize}px; |
| | margin-top: 0'>{sline}</span><BR> |
| | {i} |
| | </style> |
| | </p>""" |
| | return htmlstr |
| |
|
| | def rating_distribution_pie_chart(classification_df): |
| | pie_data = classification_df["rating"].value_counts().sort_index() |
| | fig_pie = px.pie(pie_data,height=150, values=pie_data.values, names=pie_data.index, color=pie_data.values, color_discrete_sequence=px.colors.sequential.Blues) |
| |
|
| | fig_pie.update_traces(sort=False) |
| | fig_pie.update_layout(margin=dict(t=0, b=0, l=0, r=0)) |
| | st.plotly_chart(fig_pie, use_container_width=True,height=100) |
| | |
| | def kpi_total_reviews(classification_df): |
| | total = str(len(classification_df)) |
| | total_reviews_card = KPI_card(name = "Total Reviews", value = total, icon = "fa-list") |
| | st.markdown(total_reviews_card, unsafe_allow_html=True) |
| |
|
| | def kpi_average_rating(classification_df): |
| | average_rating = round(classification_df["rating"].mean(), 1) |
| | avg_rating_card = KPI_card(name = "Average Rating", value = average_rating, icon = "fa-star") |
| | st.markdown(avg_rating_card, unsafe_allow_html=True) |
| |
|
| |
|
| | def time_series_comments(classification_df, freq = "D", metric = "Count Reviews"): |
| | labels = ["Quality", "Serve", "Pack", "Shipping", "Price"] |
| | ts_plot_data = pd.DataFrame() |
| | grouped_by_day = classification_df.groupby(pd.Grouper(key='time', freq = freq)) |
| | if metric == "Count Reviews": |
| | ts_plot_data[labels] = grouped_by_day[labels].sum() |
| | ts_plot_data["Total"] = grouped_by_day["comment"].count() |
| | ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index) |
| |
|
| | elif metric == "Average Rating": |
| | for i in labels: |
| | ts_plot_data[i] = classification_df[classification_df[i] == 1].groupby(pd.Grouper(key='time', freq = freq))["rating"].mean() |
| | ts_plot_data["Total"] = grouped_by_day["rating"].mean() |
| | ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index) |
| |
|
| | ts_plot_data = ts_plot_data.ffill() |
| | fig = px.line(ts_plot_data, x = "time", y = ts_plot_data.columns, height=300, title = f"{metric} on time") |
| | fig.update_layout( |
| | title=dict(font=dict(size=20), y=1, x=0), |
| | legend=dict( |
| | orientation="h", |
| | entrywidth=90, |
| | yanchor="bottom", |
| | y= 0, |
| | xanchor="right", |
| | x=1, title = None, traceorder = "normal", yref = "container" |
| | ), yaxis_title = metric, xaxis = dict(showgrid = False), yaxis = dict(showgrid = False), margin=dict(r=5, l=5, t=50, b=5)) |
| |
|
| | |
| | st.plotly_chart(fig, use_container_width = True) |
| |
|
| |
|
| | def hor_barchart(classification_df, metric = "Count Reviews"): |
| | labels = ["Quality", "Serve", "Pack", "Shipping", "Price"] |
| | data = {} |
| | if metric == "Average Rating": |
| | for i in labels: |
| | data[i] = classification_df[classification_df[i] == 1]["rating"].mean() |
| | data = pd.Series(data) |
| |
|
| | elif metric == "Count Reviews": |
| | data = classification_df[labels].sum().sort_values(ascending = True) |
| |
|
| | fig = px.bar(data, orientation = "h", width = 350, title = "Most commented topic") |
| | fig.update_layout(yaxis_title=None, height=400, xaxis_visible = False, showlegend = False, title=dict(font=dict(size=20), y=0.85, |
| | x=0)) |
| | st.plotly_chart(fig, use_container_width=True) |
| |
|
| |
|
| | def print_reviews(classification_df): |
| | col_1, col_2 = st.columns([1, 3], gap="large") |
| |
|
| | with col_1: |
| | labels = ["Quality", "Serve", "Pack", "Shipping", "Price"] |
| | viewing_method = st.selectbox("$$ \\bold{Viewing \: method: } $$", ["Individual Comments", "Wordcloud"]) |
| | filter_by = st.multiselect("$$ \\bold{Select \: reviews \: related \: to: } $$", labels) |
| | if filter_by: |
| | selected_labels = ", ".join(filter_by) |
| | else: |
| | selected_labels = "All topics" |
| | df_filter = classification_df.copy() |
| | if filter_by: |
| | df_filter = df_filter[df_filter[filter_by].all(axis=1)] |
| | |
| | if viewing_method == "Individual Comments": |
| | with col_1: |
| | filter_rating = st.slider("$$ \\bold{Rating \: range: \:} $$", min_value = 1, max_value= 5, value = (1, 5), step= 1) |
| | df_filter = df_filter[(df_filter["rating"] >= filter_rating[0]) & (df_filter["rating"] <= filter_rating[1])] |
| | df_filter = df_filter.sort_values("time", ascending = False) |
| |
|
| | if not df_filter.empty: |
| | top_n = st.slider("$$ \\bold{Print \: top: \:} $$", min_value = 1, max_value= len(df_filter), value = int(len(df_filter) / 10), step= 1) |
| | else: |
| | top_n = 0 |
| | with col_2: |
| | st.header(f"Displaying {top_n} most recent reviews related to :red[{selected_labels}]") |
| | if not df_filter.empty: |
| | comment_container = st.container(height=300) |
| | with comment_container: |
| | for i in range(top_n): |
| | st.markdown(f"**Reviews** **{i + 1}:**") |
| | comment = df_filter["comment"].iloc[i] |
| | time = df_filter["time"].iloc[i] |
| | rating = ":star:" * df_filter["rating"].iloc[i] |
| | sender = df_filter["username"].iloc[i] |
| | topics = [topic for topic in labels if df_filter[topic].iloc[i] == 1] |
| | topics_str = ", ".join(topics) |
| | col_1, col_2 = st.columns([1, 2]) |
| | col_1.markdown(f"From: {sender} \n Time: {time} \n Rating: {rating}") |
| | col_1.markdown(f"Topics: {topics_str}") |
| | col_2.markdown(comment) |
| | st.markdown("---") |
| | else: |
| | st.markdown("No comment satisfy the condition") |
| | else: |
| | with col_2: |
| | st.header(f"Wordcloud for reviews related to :red[{selected_labels}]") |
| | text = " ".join(comment for comment in df_filter["comment"].str.lower().values) |
| | text = word_tokenize(text, format = "text") |
| | word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text) |
| | fig, ax = plt.subplots() |
| | |
| | ax.imshow(word_cloud) |
| | ax.axis("off") |
| | st.pyplot(fig) |
| | def tornado_chart(df): |
| | labels = ["Quality", "Serve", "Pack", "Shipping", "Price"] |
| |
|
| | avg_rating = {} |
| | for i in labels: |
| | avg_rating[i] = df[df[i] == 1]["rating"].mean() |
| |
|
| | count_reviews = df[labels].sum().sort_values(ascending = True) |
| | avg_rating = pd.Series(avg_rating).reindex(index = count_reviews.index) |
| |
|
| | fig = make_subplots( |
| | rows=1 |
| | ,cols=2 |
| | ,vertical_spacing=0 |
| | ) |
| |
|
| | fig_add = fig.add_trace( |
| | go.Histogram( |
| | x= count_reviews.values |
| | ,y= count_reviews.index |
| | ,histfunc='sum' |
| | ,orientation='h' |
| | ,opacity=0.6, name='Count Reviews') |
| | ,row=1 |
| | ,col=1 |
| | ) |
| |
|
| | fig_add = fig.add_trace( |
| | go.Histogram( |
| | x= avg_rating.values |
| | ,y= avg_rating.index |
| | ,histfunc='sum' |
| | ,orientation='h' |
| | ,opacity=0.6, name='Average Rating') |
| | ,row=1 |
| | ,col=2 |
| | ) |
| |
|
| |
|
| | fig_add = fig.update_xaxes( |
| | autorange="reversed" |
| | ,row=1 |
| | ,col=1) |
| |
|
| | fig_add = fig.update_xaxes( |
| | tickmode='linear' |
| | ,dtick=1 |
| | ,row=1 |
| | ,col=2) |
| |
|
| | fig_add = fig.update_layout( |
| | title="Review Count and Average Rating by Topic", |
| | ) |
| | fig_add = fig.update_yaxes( |
| | visible=False |
| | ,row=1 |
| | ,col=2) |
| |
|
| | fig.update_layout(xaxis=dict(domain=[0.0, 0.45]), xaxis2=dict(domain=[0.45, 0.90])) |
| | fig.update_layout(legend=dict(orientation='h', xanchor='center', x=0.45)) |
| | fig.update_layout( |
| | width=500, |
| | height=300 |
| | ) |
| |
|
| | fig.update_layout( |
| | title=dict(font=dict(size=18), y=1, x=0), |
| | legend=dict( |
| | orientation="h", |
| | entrywidth=90, |
| | yanchor="bottom", |
| | y= 0, |
| | xanchor="right", |
| | x=1, title = None, traceorder = "normal", yref = "container" |
| | )) |
| | fig.update_layout(margin=dict(r=5, l=5, t=50, b=0)) |
| | st.plotly_chart(fig, use_container_width = True) |