NTDuy commited on
Commit
c75af98
·
verified ·
1 Parent(s): 72f3273

Upload graphs.py

Browse files
Files changed (1) hide show
  1. src/graphs.py +261 -0
src/graphs.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import pandas as pd
6
+ from plotly.subplots import make_subplots
7
+ import plotly.graph_objects as go
8
+ from wordcloud import WordCloud
9
+ from underthesea import word_tokenize
10
+ import matplotlib.pyplot as plt
11
+
12
+
13
+ def sentence_topic_plot(result):
14
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price", "Other"][::-1]
15
+ values = result.detach().numpy()[0][::-1]
16
+ combined = {labels[i] : values[i] for i in range(len(labels))}
17
+ sorted_data = dict(sorted(combined.items(), key=lambda item: item[1]))
18
+ labels = list(sorted_data.keys())
19
+ values = list(sorted_data.values())
20
+ filtered_data = {key: value for key, value in combined.items() if value >= 0.5}
21
+ fig = go.Figure()
22
+ fig.add_trace(go.Bar(
23
+ y=labels,
24
+ x=values,
25
+ orientation='h'
26
+ ))
27
+ fig.update_layout(xaxis_title="Probability", yaxis_title="Topics")
28
+ message = ", ".join(i for i in filtered_data.keys())
29
+ st.header(f"Your review is related to :blue[{message}]" )
30
+ st.plotly_chart(fig, use_container_width=True)
31
+
32
+ def KPI_card(name = "Total Reviews", value = 1000, box_color = (123,167,212), font_color = (0, 0, 0), icon = "fa-list"):
33
+ wch_colour_box = box_color
34
+ wch_colour_font = font_color
35
+ fontsize = 20
36
+ valign = "left"
37
+ iconname = icon
38
+ sline = name # kpi name
39
+
40
+ i = value # kpi value
41
+
42
+ htmlstr = f"""<p style='background-color:rgb({wch_colour_box[0]}, {wch_colour_box[1]}, {wch_colour_box[2]});
43
+ color: rgb({wch_colour_font[0]}, {wch_colour_font[1]}, {wch_colour_font[2]});
44
+ font-size: {fontsize + 10}px;
45
+ font-family: "Source Sans Pro", sans-serif;
46
+ font-weight: 600;
47
+ border-radius: 20px;
48
+ padding-left: 20px;
49
+ padding-top: 30px;
50
+ padding-bottom: 30px;
51
+ line-height:30px; text-align: center;'>
52
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
53
+ <i class="fa-solid fas {iconname} fa-2x fa-pull-left "></i>
54
+ <span style='font-size: {fontsize}px;
55
+ margin-top: 0'>{sline}</span><BR>
56
+ {i}
57
+ </style>
58
+ </p>"""
59
+ return htmlstr
60
+
61
+ def rating_distribution_pie_chart(classification_df):
62
+ pie_data = classification_df["rating"].value_counts().sort_index()
63
+ fig_pie = px.pie(pie_data,height=150, values=pie_data.values, names=pie_data.index, color=pie_data.values, color_discrete_sequence=px.colors.sequential.Blues)
64
+
65
+ fig_pie.update_traces(sort=False)
66
+ fig_pie.update_layout(margin=dict(t=0, b=0, l=0, r=0))
67
+ st.plotly_chart(fig_pie, use_container_width=True,height=100)
68
+
69
+ def kpi_total_reviews(classification_df):
70
+ total = str(len(classification_df))
71
+ total_reviews_card = KPI_card(name = "Total Reviews", value = total, icon = "fa-list")
72
+ st.markdown(total_reviews_card, unsafe_allow_html=True)
73
+
74
+ def kpi_average_rating(classification_df):
75
+ average_rating = round(classification_df["rating"].mean(), 1)
76
+ avg_rating_card = KPI_card(name = "Average Rating", value = average_rating, icon = "fa-star")
77
+ st.markdown(avg_rating_card, unsafe_allow_html=True)
78
+
79
+
80
+ def time_series_comments(classification_df, freq = "D", metric = "Count Reviews"):
81
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
82
+ ts_plot_data = pd.DataFrame()
83
+ grouped_by_day = classification_df.groupby(pd.Grouper(key='time', freq = freq))
84
+ if metric == "Count Reviews":
85
+ ts_plot_data[labels] = grouped_by_day[labels].sum()
86
+ ts_plot_data["Total"] = grouped_by_day["comment"].count()
87
+ ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
88
+
89
+ elif metric == "Average Rating":
90
+ for i in labels:
91
+ ts_plot_data[i] = classification_df[classification_df[i] == 1].groupby(pd.Grouper(key='time', freq = freq))["rating"].mean()
92
+ ts_plot_data["Total"] = grouped_by_day["rating"].mean()
93
+ ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
94
+
95
+ ts_plot_data = ts_plot_data.ffill()
96
+ fig = px.line(ts_plot_data, x = "time", y = ts_plot_data.columns, height=300, title = f"{metric} on time")
97
+ fig.update_layout(
98
+ title=dict(font=dict(size=20), y=1, x=0),
99
+ legend=dict(
100
+ orientation="h",
101
+ entrywidth=90,
102
+ yanchor="bottom",
103
+ y= 0,
104
+ xanchor="right",
105
+ x=1, title = None, traceorder = "normal", yref = "container"
106
+ ), yaxis_title = metric, xaxis = dict(showgrid = False), yaxis = dict(showgrid = False), margin=dict(r=5, l=5, t=50, b=5))
107
+
108
+
109
+ st.plotly_chart(fig, use_container_width = True)
110
+
111
+
112
+ def hor_barchart(classification_df, metric = "Count Reviews"):
113
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
114
+ data = {}
115
+ if metric == "Average Rating":
116
+ for i in labels:
117
+ data[i] = classification_df[classification_df[i] == 1]["rating"].mean()
118
+ data = pd.Series(data)
119
+
120
+ elif metric == "Count Reviews":
121
+ data = classification_df[labels].sum().sort_values(ascending = True)
122
+
123
+ fig = px.bar(data, orientation = "h", width = 350, title = "Most commented topic")
124
+ fig.update_layout(yaxis_title=None, height=400, xaxis_visible = False, showlegend = False, title=dict(font=dict(size=20), y=0.85,
125
+ x=0))
126
+ st.plotly_chart(fig, use_container_width=True)
127
+
128
+
129
+ def print_reviews(classification_df):
130
+ col_1, col_2 = st.columns([1, 3], gap="large")
131
+
132
+ with col_1:
133
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
134
+ viewing_method = st.selectbox("$$ \\bold{Viewing \: method: } $$", ["Individual Comments", "Wordcloud"])
135
+ filter_by = st.multiselect("$$ \\bold{Select \: reviews \: related \: to: } $$", labels)
136
+ if filter_by:
137
+ selected_labels = ", ".join(filter_by)
138
+ else:
139
+ selected_labels = "All topics"
140
+ df_filter = classification_df.copy()
141
+ if filter_by:
142
+ df_filter = df_filter[df_filter[filter_by].all(axis=1)]
143
+
144
+ if viewing_method == "Individual Comments":
145
+ with col_1:
146
+ filter_rating = st.slider("$$ \\bold{Rating \: range: \:} $$", min_value = 1, max_value= 5, value = (1, 5), step= 1)
147
+ df_filter = df_filter[(df_filter["rating"] >= filter_rating[0]) & (df_filter["rating"] <= filter_rating[1])]
148
+ df_filter = df_filter.sort_values("time", ascending = False)
149
+
150
+ if not df_filter.empty:
151
+ top_n = st.slider("$$ \\bold{Print \: top: \:} $$", min_value = 1, max_value= len(df_filter), value = int(len(df_filter) / 10), step= 1)
152
+ else:
153
+ top_n = 0
154
+ with col_2:
155
+ st.header(f"Displaying {top_n} most recent reviews related to :red[{selected_labels}]")
156
+ if not df_filter.empty:
157
+ comment_container = st.container(height=300)
158
+ with comment_container:
159
+ for i in range(top_n):
160
+ st.markdown(f"**Reviews** **{i + 1}:**")
161
+ comment = df_filter["comment"].iloc[i]
162
+ time = df_filter["time"].iloc[i]
163
+ rating = ":star:" * df_filter["rating"].iloc[i]
164
+ sender = df_filter["username"].iloc[i]
165
+ topics = [topic for topic in labels if df_filter[topic].iloc[i] == 1]
166
+ topics_str = ", ".join(topics)
167
+ col_1, col_2 = st.columns([1, 2])
168
+ col_1.markdown(f"From: {sender} \n Time: {time} \n Rating: {rating}")
169
+ col_1.markdown(f"Topics: {topics_str}")
170
+ col_2.markdown(comment)
171
+ st.markdown("---")
172
+ else:
173
+ st.markdown("No comment satisfy the condition")
174
+ else:
175
+ with col_2:
176
+ st.header(f"Wordcloud for reviews related to :red[{selected_labels}]")
177
+ text = " ".join(comment for comment in df_filter["comment"].str.lower().values)
178
+ text = word_tokenize(text, format = "text")
179
+ word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)
180
+ fig, ax = plt.subplots()
181
+ # Plot the word cloud on the axes
182
+ ax.imshow(word_cloud)
183
+ ax.axis("off")
184
+ st.pyplot(fig)
185
+ def tornado_chart(df):
186
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
187
+
188
+ avg_rating = {}
189
+ for i in labels:
190
+ avg_rating[i] = df[df[i] == 1]["rating"].mean()
191
+
192
+ count_reviews = df[labels].sum().sort_values(ascending = True)
193
+ avg_rating = pd.Series(avg_rating).reindex(index = count_reviews.index)
194
+
195
+ fig = make_subplots(
196
+ rows=1
197
+ ,cols=2
198
+ ,vertical_spacing=0
199
+ )
200
+
201
+ fig_add = fig.add_trace(
202
+ go.Histogram(
203
+ x= count_reviews.values
204
+ ,y= count_reviews.index
205
+ ,histfunc='sum'
206
+ ,orientation='h'
207
+ ,opacity=0.6, name='Count Reviews')
208
+ ,row=1
209
+ ,col=1
210
+ )
211
+
212
+ fig_add = fig.add_trace(
213
+ go.Histogram(
214
+ x= avg_rating.values
215
+ ,y= avg_rating.index
216
+ ,histfunc='sum'
217
+ ,orientation='h'
218
+ ,opacity=0.6, name='Average Rating')
219
+ ,row=1
220
+ ,col=2
221
+ )
222
+
223
+
224
+ fig_add = fig.update_xaxes(
225
+ autorange="reversed"
226
+ ,row=1
227
+ ,col=1)
228
+
229
+ fig_add = fig.update_xaxes(
230
+ tickmode='linear'
231
+ ,dtick=1
232
+ ,row=1
233
+ ,col=2)
234
+
235
+ fig_add = fig.update_layout(
236
+ title="Review Count and Average Rating by Topic",
237
+ )
238
+ fig_add = fig.update_yaxes(
239
+ visible=False
240
+ ,row=1
241
+ ,col=2)
242
+
243
+ fig.update_layout(xaxis=dict(domain=[0.0, 0.45]), xaxis2=dict(domain=[0.45, 0.90]))
244
+ fig.update_layout(legend=dict(orientation='h', xanchor='center', x=0.45))
245
+ fig.update_layout(
246
+ width=500,
247
+ height=300
248
+ )
249
+
250
+ fig.update_layout(
251
+ title=dict(font=dict(size=18), y=1, x=0),
252
+ legend=dict(
253
+ orientation="h",
254
+ entrywidth=90,
255
+ yanchor="bottom",
256
+ y= 0,
257
+ xanchor="right",
258
+ x=1, title = None, traceorder = "normal", yref = "container"
259
+ ))
260
+ fig.update_layout(margin=dict(r=5, l=5, t=50, b=0))
261
+ st.plotly_chart(fig, use_container_width = True)