NTDuy commited on
Commit
cfb4c58
·
verified ·
1 Parent(s): 48cee12

Update graphs.py

Browse files
Files changed (1) hide show
  1. graphs.py +253 -253
graphs.py CHANGED
@@ -1,254 +1,254 @@
1
-
2
- import streamlit as st
3
- import plotly.express as px
4
- import plotly.graph_objects as go
5
- import pandas as pd
6
- from plotly.subplots import make_subplots
7
- import plotly.graph_objects as go
8
- from wordcloud import WordCloud
9
- from underthesea import word_tokenize
10
- import matplotlib.pyplot as plt
11
-
12
- def sentence_topic_plot(result):
13
- labels = ["Quality", "Serve", "Pack", "Shipping", "Price", "Other"][::-1]
14
- values = result.detach().numpy()[0][::-1]
15
- fig = go.Figure()
16
- fig.add_trace(go.Bar(
17
- y=labels,
18
- x=values,
19
- orientation='h'
20
- ))
21
- fig.update_layout(xaxis_title="Probability", yaxis_title="Topics")
22
- st.plotly_chart(fig, use_container_width=True)
23
-
24
- def KPI_card(name = "Total Reviews", value = 1000, box_color = (123,167,212), font_color = (0, 0, 0), icon = "fa-list"):
25
- wch_colour_box = box_color
26
- wch_colour_font = font_color
27
- fontsize = 20
28
- valign = "left"
29
- iconname = icon
30
- sline = name # kpi name
31
-
32
- i = value # kpi value
33
-
34
- htmlstr = f"""<p style='background-color:rgb({wch_colour_box[0]}, {wch_colour_box[1]}, {wch_colour_box[2]});
35
- color: rgb({wch_colour_font[0]}, {wch_colour_font[1]}, {wch_colour_font[2]});
36
- font-size: {fontsize + 10}px;
37
- font-family: "Source Sans Pro", sans-serif;
38
- font-weight: 600;
39
- border-radius: 20px;
40
- padding-left: 20px;
41
- padding-top: 30px;
42
- padding-bottom: 30px;
43
- line-height:30px; text-align: center;'>
44
- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
45
- <i class="fa-solid fas {iconname} fa-2x fa-pull-left "></i>
46
- <span style='font-size: {fontsize}px;
47
- margin-top: 0'>{sline}</span><BR>
48
- {i}
49
- </style>
50
- </p>"""
51
- return htmlstr
52
-
53
- def rating_distribution_pie_chart(classification_df):
54
- pie_data = classification_df["rating"].value_counts().sort_index()
55
- fig_pie = px.pie(pie_data,height=150, values=pie_data.values, names=pie_data.index, color=pie_data.values, color_discrete_sequence=px.colors.sequential.Blues)
56
-
57
- fig_pie.update_traces(sort=False)
58
- fig_pie.update_layout(margin=dict(t=0, b=0, l=0, r=0))
59
- st.plotly_chart(fig_pie, use_container_width=True,height=100)
60
-
61
- def kpi_total_reviews(classification_df):
62
- total = str(len(classification_df))
63
- total_reviews_card = KPI_card(name = "Total Reviews", value = total, icon = "fa-list")
64
- st.markdown(total_reviews_card, unsafe_allow_html=True)
65
-
66
- def kpi_average_rating(classification_df):
67
- average_rating = round(classification_df["rating"].mean(), 1)
68
- avg_rating_card = KPI_card(name = "Average Rating", value = average_rating, icon = "fa-star")
69
- st.markdown(avg_rating_card, unsafe_allow_html=True)
70
-
71
-
72
- def time_series_comments(classification_df, freq = "D", metric = "Count Reviews"):
73
- labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
74
- ts_plot_data = pd.DataFrame()
75
- grouped_by_day = classification_df.groupby(pd.Grouper(key='time', freq = freq))
76
- if metric == "Count Reviews":
77
- ts_plot_data[labels] = grouped_by_day[labels].sum()
78
- ts_plot_data["Total"] = grouped_by_day["itemid"].count()
79
- ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
80
-
81
- elif metric == "Average Rating":
82
- for i in labels:
83
- ts_plot_data[i] = classification_df[classification_df[i] == 1].groupby(pd.Grouper(key='time', freq = freq))["rating"].mean()
84
- ts_plot_data["Total"] = grouped_by_day["rating"].mean()
85
- ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
86
-
87
- ts_plot_data = ts_plot_data.ffill()
88
- fig = px.line(ts_plot_data, x = "time", y = ts_plot_data.columns, height=300, title = f"{metric} on time")
89
- fig.update_layout(
90
- title=dict(font=dict(size=20), y=1, x=0),
91
- legend=dict(
92
- orientation="h",
93
- entrywidth=90,
94
- yanchor="bottom",
95
- y= 0,
96
- xanchor="right",
97
- x=1, title = None, traceorder = "normal", yref = "container"
98
- ), yaxis_title = metric, xaxis = dict(showgrid = False), yaxis = dict(showgrid = False), margin=dict(r=5, l=5, t=50, b=5))
99
-
100
-
101
- st.plotly_chart(fig, use_container_width = True)
102
-
103
-
104
- def hor_barchart(classification_df, metric = "Count Reviews"):
105
- labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
106
- data = {}
107
- if metric == "Average Rating":
108
- for i in labels:
109
- data[i] = classification_df[classification_df[i] == 1]["rating"].mean()
110
- data = pd.Series(data)
111
-
112
- elif metric == "Count Reviews":
113
- data = classification_df[labels].sum().sort_values(ascending = True)
114
-
115
- fig = px.bar(data, orientation = "h", width = 350, title = "Most commented topic")
116
- fig.update_layout(yaxis_title=None, height=400, xaxis_visible = False, showlegend = False, title=dict(font=dict(size=20), y=0.85,
117
- x=0))
118
- st.plotly_chart(fig, use_container_width=True)
119
-
120
-
121
- def print_reviews(classification_df):
122
- col_1, col_2 = st.columns([1, 3], gap="large")
123
-
124
- with col_1:
125
- labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
126
- viewing_method = st.selectbox("$$ \\bold{Viewing \: method: } $$", ["Individual Comments", "Wordcloud"])
127
- filter_by = st.multiselect("$$ \\bold{Select \: reviews \: related \: to: } $$", labels)
128
- if filter_by:
129
- selected_labels = ", ".join(filter_by)
130
- else:
131
- selected_labels = "All topics"
132
- df_filter = classification_df.copy()
133
- if filter_by:
134
- other = list(set(labels) - set(filter_by))
135
- df_filter = df_filter[df_filter[filter_by].all(axis=1) & ~df_filter[other].any(axis=1)]
136
-
137
- if viewing_method == "Individual Comments":
138
- with col_1:
139
- filter_rating = st.slider("$$ \\bold{Rating \: range: \:} $$", min_value = 1, max_value= 5, value = (1, 5), step= 1)
140
- df_filter = df_filter[(df_filter["rating"] >= filter_rating[0]) & (df_filter["rating"] <= filter_rating[1])]
141
- df_filter = df_filter.sort_values("time", ascending = False)
142
-
143
- if not df_filter.empty:
144
- top_n = st.slider("$$ \\bold{Print \: top: \:} $$", min_value = 1, max_value= len(df_filter), value = int(len(df_filter) / 10), step= 1)
145
- else:
146
- top_n = 0
147
- with col_2:
148
- st.header(f"Displaying {top_n} most recent reviews related to :red[{selected_labels}]")
149
- if not df_filter.empty:
150
- comment_container = st.container(height=300)
151
- with comment_container:
152
- for i in range(top_n):
153
- st.markdown(f"**Reviews** **{i + 1}:**")
154
- comment = df_filter["comment"].iloc[i]
155
- time = df_filter["time"].iloc[i]
156
- rating = ":star:" * df_filter["rating"].iloc[i]
157
- sender = df_filter["username"].iloc[i]
158
- topics = [topic for topic in labels if df_filter[topic].iloc[i] == 1]
159
- topics_str = ", ".join(topics)
160
- col_1, col_2 = st.columns([1, 2])
161
- col_1.markdown(f"From: {sender} \n Time: {time} \n Rating: {rating}")
162
- col_1.markdown(f"Topics: {topics_str}")
163
- col_2.markdown(comment)
164
- st.markdown("---")
165
- else:
166
- st.markdown("No comment satisfy the condition")
167
- else:
168
- with col_2:
169
- st.header(f"Wordcloud for reviews related to :red[{selected_labels}]")
170
- text = " ".join(comment for comment in df_filter["comment"].str.lower().values)
171
- text = word_tokenize(text, format = "text")
172
- word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)
173
- fig, ax = plt.subplots()
174
- # Plot the word cloud on the axes
175
- ax.imshow(word_cloud)
176
- ax.axis("off")
177
- st.pyplot(fig)
178
- def tornado_chart(df):
179
- labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
180
-
181
- avg_rating = {}
182
- for i in labels:
183
- avg_rating[i] = df[df[i] == 1]["rating"].mean()
184
-
185
- count_reviews = df[labels].sum().sort_values(ascending = True)
186
- avg_rating = pd.Series(avg_rating).reindex(index = count_reviews.index)
187
-
188
- fig = make_subplots(
189
- rows=1
190
- ,cols=2
191
- ,vertical_spacing=0
192
- )
193
-
194
- fig_add = fig.add_trace(
195
- go.Histogram(
196
- x= count_reviews.values
197
- ,y= count_reviews.index
198
- ,histfunc='sum'
199
- ,orientation='h'
200
- ,opacity=0.6, name='Count Reviews')
201
- ,row=1
202
- ,col=1
203
- )
204
-
205
- fig_add = fig.add_trace(
206
- go.Histogram(
207
- x= avg_rating.values
208
- ,y= avg_rating.index
209
- ,histfunc='sum'
210
- ,orientation='h'
211
- ,opacity=0.6, name='Average Rating')
212
- ,row=1
213
- ,col=2
214
- )
215
-
216
-
217
- fig_add = fig.update_xaxes(
218
- autorange="reversed"
219
- ,row=1
220
- ,col=1)
221
-
222
- fig_add = fig.update_xaxes(
223
- tickmode='linear'
224
- ,dtick=1
225
- ,row=1
226
- ,col=2)
227
-
228
- fig_add = fig.update_layout(
229
- title="Review Count and Average Rating by Topic",
230
- )
231
- fig_add = fig.update_yaxes(
232
- visible=False
233
- ,row=1
234
- ,col=2)
235
-
236
- fig.update_layout(xaxis=dict(domain=[0.0, 0.45]), xaxis2=dict(domain=[0.45, 0.90]))
237
- fig.update_layout(legend=dict(orientation='h', xanchor='center', x=0.45))
238
- fig.update_layout(
239
- width=500,
240
- height=300
241
- )
242
-
243
- fig.update_layout(
244
- title=dict(font=dict(size=18), y=1, x=0),
245
- legend=dict(
246
- orientation="h",
247
- entrywidth=90,
248
- yanchor="bottom",
249
- y= 0,
250
- xanchor="right",
251
- x=1, title = None, traceorder = "normal", yref = "container"
252
- ))
253
- fig.update_layout(margin=dict(r=5, l=5, t=50, b=0))
254
  st.plotly_chart(fig, use_container_width = True)
 
1
+
2
+ import streamlit as st
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import pandas as pd
6
+ from plotly.subplots import make_subplots
7
+ import plotly.graph_objects as go
8
+ from wordcloud import WordCloud
9
+ from underthesea import word_tokenize
10
+ import matplotlib.pyplot as plt
11
+
12
+ def sentence_topic_plot(result):
13
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price", "Other"][::-1]
14
+ values = result.detach().numpy()[0][::-1]
15
+ fig = go.Figure()
16
+ fig.add_trace(go.Bar(
17
+ y=labels,
18
+ x=values,
19
+ orientation='h'
20
+ ))
21
+ fig.update_layout(xaxis_title="Probability", yaxis_title="Topics")
22
+ st.plotly_chart(fig, use_container_width=True)
23
+
24
+ def KPI_card(name = "Total Reviews", value = 1000, box_color = (123,167,212), font_color = (0, 0, 0), icon = "fa-list"):
25
+ wch_colour_box = box_color
26
+ wch_colour_font = font_color
27
+ fontsize = 20
28
+ valign = "left"
29
+ iconname = icon
30
+ sline = name # kpi name
31
+
32
+ i = value # kpi value
33
+
34
+ htmlstr = f"""<p style='background-color:rgb({wch_colour_box[0]}, {wch_colour_box[1]}, {wch_colour_box[2]});
35
+ color: rgb({wch_colour_font[0]}, {wch_colour_font[1]}, {wch_colour_font[2]});
36
+ font-size: {fontsize + 10}px;
37
+ font-family: "Source Sans Pro", sans-serif;
38
+ font-weight: 600;
39
+ border-radius: 20px;
40
+ padding-left: 20px;
41
+ padding-top: 30px;
42
+ padding-bottom: 30px;
43
+ line-height:30px; text-align: center;'>
44
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
45
+ <i class="fa-solid fas {iconname} fa-2x fa-pull-left "></i>
46
+ <span style='font-size: {fontsize}px;
47
+ margin-top: 0'>{sline}</span><BR>
48
+ {i}
49
+ </style>
50
+ </p>"""
51
+ return htmlstr
52
+
53
+ def rating_distribution_pie_chart(classification_df):
54
+ pie_data = classification_df["rating"].value_counts().sort_index()
55
+ fig_pie = px.pie(pie_data,height=150, values=pie_data.values, names=pie_data.index, color=pie_data.values, color_discrete_sequence=px.colors.sequential.Blues)
56
+
57
+ fig_pie.update_traces(sort=False)
58
+ fig_pie.update_layout(margin=dict(t=0, b=0, l=0, r=0))
59
+ st.plotly_chart(fig_pie, use_container_width=True,height=100)
60
+
61
+ def kpi_total_reviews(classification_df):
62
+ total = str(len(classification_df))
63
+ total_reviews_card = KPI_card(name = "Total Reviews", value = total, icon = "fa-list")
64
+ st.markdown(total_reviews_card, unsafe_allow_html=True)
65
+
66
+ def kpi_average_rating(classification_df):
67
+ average_rating = round(classification_df["rating"].mean(), 1)
68
+ avg_rating_card = KPI_card(name = "Average Rating", value = average_rating, icon = "fa-star")
69
+ st.markdown(avg_rating_card, unsafe_allow_html=True)
70
+
71
+
72
+ def time_series_comments(classification_df, freq = "D", metric = "Count Reviews"):
73
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
74
+ ts_plot_data = pd.DataFrame()
75
+ grouped_by_day = classification_df.groupby(pd.Grouper(key='time', freq = freq))
76
+ if metric == "Count Reviews":
77
+ ts_plot_data[labels] = grouped_by_day[labels].sum()
78
+ ts_plot_data["Total"] = grouped_by_day["comment"].count()
79
+ ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
80
+
81
+ elif metric == "Average Rating":
82
+ for i in labels:
83
+ ts_plot_data[i] = classification_df[classification_df[i] == 1].groupby(pd.Grouper(key='time', freq = freq))["rating"].mean()
84
+ ts_plot_data["Total"] = grouped_by_day["rating"].mean()
85
+ ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
86
+
87
+ ts_plot_data = ts_plot_data.ffill()
88
+ fig = px.line(ts_plot_data, x = "time", y = ts_plot_data.columns, height=300, title = f"{metric} on time")
89
+ fig.update_layout(
90
+ title=dict(font=dict(size=20), y=1, x=0),
91
+ legend=dict(
92
+ orientation="h",
93
+ entrywidth=90,
94
+ yanchor="bottom",
95
+ y= 0,
96
+ xanchor="right",
97
+ x=1, title = None, traceorder = "normal", yref = "container"
98
+ ), yaxis_title = metric, xaxis = dict(showgrid = False), yaxis = dict(showgrid = False), margin=dict(r=5, l=5, t=50, b=5))
99
+
100
+
101
+ st.plotly_chart(fig, use_container_width = True)
102
+
103
+
104
+ def hor_barchart(classification_df, metric = "Count Reviews"):
105
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
106
+ data = {}
107
+ if metric == "Average Rating":
108
+ for i in labels:
109
+ data[i] = classification_df[classification_df[i] == 1]["rating"].mean()
110
+ data = pd.Series(data)
111
+
112
+ elif metric == "Count Reviews":
113
+ data = classification_df[labels].sum().sort_values(ascending = True)
114
+
115
+ fig = px.bar(data, orientation = "h", width = 350, title = "Most commented topic")
116
+ fig.update_layout(yaxis_title=None, height=400, xaxis_visible = False, showlegend = False, title=dict(font=dict(size=20), y=0.85,
117
+ x=0))
118
+ st.plotly_chart(fig, use_container_width=True)
119
+
120
+
121
+ def print_reviews(classification_df):
122
+ col_1, col_2 = st.columns([1, 3], gap="large")
123
+
124
+ with col_1:
125
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
126
+ viewing_method = st.selectbox("$$ \\bold{Viewing \: method: } $$", ["Individual Comments", "Wordcloud"])
127
+ filter_by = st.multiselect("$$ \\bold{Select \: reviews \: related \: to: } $$", labels)
128
+ if filter_by:
129
+ selected_labels = ", ".join(filter_by)
130
+ else:
131
+ selected_labels = "All topics"
132
+ df_filter = classification_df.copy()
133
+ if filter_by:
134
+ other = list(set(labels) - set(filter_by))
135
+ df_filter = df_filter[df_filter[filter_by].all(axis=1) & ~df_filter[other].any(axis=1)]
136
+
137
+ if viewing_method == "Individual Comments":
138
+ with col_1:
139
+ filter_rating = st.slider("$$ \\bold{Rating \: range: \:} $$", min_value = 1, max_value= 5, value = (1, 5), step= 1)
140
+ df_filter = df_filter[(df_filter["rating"] >= filter_rating[0]) & (df_filter["rating"] <= filter_rating[1])]
141
+ df_filter = df_filter.sort_values("time", ascending = False)
142
+
143
+ if not df_filter.empty:
144
+ top_n = st.slider("$$ \\bold{Print \: top: \:} $$", min_value = 1, max_value= len(df_filter), value = int(len(df_filter) / 10), step= 1)
145
+ else:
146
+ top_n = 0
147
+ with col_2:
148
+ st.header(f"Displaying {top_n} most recent reviews related to :red[{selected_labels}]")
149
+ if not df_filter.empty:
150
+ comment_container = st.container(height=300)
151
+ with comment_container:
152
+ for i in range(top_n):
153
+ st.markdown(f"**Reviews** **{i + 1}:**")
154
+ comment = df_filter["comment"].iloc[i]
155
+ time = df_filter["time"].iloc[i]
156
+ rating = ":star:" * df_filter["rating"].iloc[i]
157
+ sender = df_filter["username"].iloc[i]
158
+ topics = [topic for topic in labels if df_filter[topic].iloc[i] == 1]
159
+ topics_str = ", ".join(topics)
160
+ col_1, col_2 = st.columns([1, 2])
161
+ col_1.markdown(f"From: {sender} \n Time: {time} \n Rating: {rating}")
162
+ col_1.markdown(f"Topics: {topics_str}")
163
+ col_2.markdown(comment)
164
+ st.markdown("---")
165
+ else:
166
+ st.markdown("No comment satisfy the condition")
167
+ else:
168
+ with col_2:
169
+ st.header(f"Wordcloud for reviews related to :red[{selected_labels}]")
170
+ text = " ".join(comment for comment in df_filter["comment"].str.lower().values)
171
+ text = word_tokenize(text, format = "text")
172
+ word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)
173
+ fig, ax = plt.subplots()
174
+ # Plot the word cloud on the axes
175
+ ax.imshow(word_cloud)
176
+ ax.axis("off")
177
+ st.pyplot(fig)
178
+ def tornado_chart(df):
179
+ labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
180
+
181
+ avg_rating = {}
182
+ for i in labels:
183
+ avg_rating[i] = df[df[i] == 1]["rating"].mean()
184
+
185
+ count_reviews = df[labels].sum().sort_values(ascending = True)
186
+ avg_rating = pd.Series(avg_rating).reindex(index = count_reviews.index)
187
+
188
+ fig = make_subplots(
189
+ rows=1
190
+ ,cols=2
191
+ ,vertical_spacing=0
192
+ )
193
+
194
+ fig_add = fig.add_trace(
195
+ go.Histogram(
196
+ x= count_reviews.values
197
+ ,y= count_reviews.index
198
+ ,histfunc='sum'
199
+ ,orientation='h'
200
+ ,opacity=0.6, name='Count Reviews')
201
+ ,row=1
202
+ ,col=1
203
+ )
204
+
205
+ fig_add = fig.add_trace(
206
+ go.Histogram(
207
+ x= avg_rating.values
208
+ ,y= avg_rating.index
209
+ ,histfunc='sum'
210
+ ,orientation='h'
211
+ ,opacity=0.6, name='Average Rating')
212
+ ,row=1
213
+ ,col=2
214
+ )
215
+
216
+
217
+ fig_add = fig.update_xaxes(
218
+ autorange="reversed"
219
+ ,row=1
220
+ ,col=1)
221
+
222
+ fig_add = fig.update_xaxes(
223
+ tickmode='linear'
224
+ ,dtick=1
225
+ ,row=1
226
+ ,col=2)
227
+
228
+ fig_add = fig.update_layout(
229
+ title="Review Count and Average Rating by Topic",
230
+ )
231
+ fig_add = fig.update_yaxes(
232
+ visible=False
233
+ ,row=1
234
+ ,col=2)
235
+
236
+ fig.update_layout(xaxis=dict(domain=[0.0, 0.45]), xaxis2=dict(domain=[0.45, 0.90]))
237
+ fig.update_layout(legend=dict(orientation='h', xanchor='center', x=0.45))
238
+ fig.update_layout(
239
+ width=500,
240
+ height=300
241
+ )
242
+
243
+ fig.update_layout(
244
+ title=dict(font=dict(size=18), y=1, x=0),
245
+ legend=dict(
246
+ orientation="h",
247
+ entrywidth=90,
248
+ yanchor="bottom",
249
+ y= 0,
250
+ xanchor="right",
251
+ x=1, title = None, traceorder = "normal", yref = "container"
252
+ ))
253
+ fig.update_layout(margin=dict(r=5, l=5, t=50, b=0))
254
  st.plotly_chart(fig, use_container_width = True)