Spaces:

NTDuy
/

vietnamese_ecommerce_topic_classification

Paused

App Files Files Community

NTDuy commited on May 22, 2024

Commit

cfb4c58

verified ·

1 Parent(s): 48cee12

Update graphs.py

Browse files

Files changed (1) hide show

graphs.py +253 -253

graphs.py CHANGED Viewed

@@ -1,254 +1,254 @@
-import streamlit as st
-import plotly.express as px
-import plotly.graph_objects as go
-import pandas as pd
-from plotly.subplots import make_subplots
-import plotly.graph_objects as go
-from wordcloud import WordCloud
-from underthesea import word_tokenize
-import matplotlib.pyplot as plt
-def sentence_topic_plot(result):
-    labels = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"][::-1]
-    values = result.detach().numpy()[0][::-1]
-    fig = go.Figure()
-    fig.add_trace(go.Bar(
-        y=labels,
-        x=values,
-        orientation='h'
-    ))
-    fig.update_layout(xaxis_title="Probability", yaxis_title="Topics")
-    st.plotly_chart(fig, use_container_width=True)
-def KPI_card(name = "Total Reviews", value = 1000, box_color = (123,167,212), font_color = (0, 0, 0), icon = "fa-list"):
-    wch_colour_box =  box_color
-    wch_colour_font =  font_color
-    fontsize = 20
-    valign = "left"
-    iconname = icon
-    sline = name # kpi name
-    i = value # kpi value
-    htmlstr = f"""<p style='background-color:rgb({wch_colour_box[0]}, {wch_colour_box[1]}, {wch_colour_box[2]});
-            color: rgb({wch_colour_font[0]}, {wch_colour_font[1]}, {wch_colour_font[2]});
-            font-size: {fontsize + 10}px;
-            font-family: "Source Sans Pro", sans-serif;
-            font-weight: 600;
-            border-radius: 20px;
-            padding-left: 20px;
-            padding-top: 30px;
-            padding-bottom: 30px;
-            line-height:30px; text-align: center;'>
-            <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
-            <i class="fa-solid fas {iconname} fa-2x fa-pull-left "></i>
-            <span style='font-size: {fontsize}px;
-            margin-top: 0'>{sline}</span><BR>
-            {i}
-            </style>
-            </p>"""
-    return htmlstr
-def rating_distribution_pie_chart(classification_df):
-    pie_data = classification_df["rating"].value_counts().sort_index()
-    fig_pie = px.pie(pie_data,height=150, values=pie_data.values, names=pie_data.index, color=pie_data.values, color_discrete_sequence=px.colors.sequential.Blues)
-    fig_pie.update_traces(sort=False)
-    fig_pie.update_layout(margin=dict(t=0, b=0, l=0, r=0))
-    st.plotly_chart(fig_pie, use_container_width=True,height=100)
-def kpi_total_reviews(classification_df):
-    total = str(len(classification_df))
-    total_reviews_card = KPI_card(name = "Total Reviews", value = total, icon = "fa-list")
-    st.markdown(total_reviews_card, unsafe_allow_html=True)
-def kpi_average_rating(classification_df):
-    average_rating = round(classification_df["rating"].mean(), 1)
-    avg_rating_card = KPI_card(name = "Average Rating", value = average_rating, icon = "fa-star")
-    st.markdown(avg_rating_card, unsafe_allow_html=True)
-def time_series_comments(classification_df, freq = "D", metric = "Count Reviews"):
-    labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
-    ts_plot_data = pd.DataFrame()
-    grouped_by_day = classification_df.groupby(pd.Grouper(key='time', freq = freq))
-    if metric == "Count Reviews":
-        ts_plot_data[labels] = grouped_by_day[labels].sum()
-        ts_plot_data["Total"] = grouped_by_day["itemid"].count()
-        ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
-    elif metric == "Average Rating":
-        for i in labels:
-            ts_plot_data[i] = classification_df[classification_df[i] == 1].groupby(pd.Grouper(key='time', freq = freq))["rating"].mean()
-        ts_plot_data["Total"] = grouped_by_day["rating"].mean()
-        ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
-    ts_plot_data = ts_plot_data.ffill()
-    fig = px.line(ts_plot_data, x = "time", y = ts_plot_data.columns, height=300, title = f"{metric} on time")
-    fig.update_layout(
-        title=dict(font=dict(size=20), y=1,  x=0),
-        legend=dict(
-        orientation="h",
-        entrywidth=90,
-        yanchor="bottom",
-        y= 0,
-        xanchor="right",
-        x=1, title = None, traceorder = "normal", yref = "container"
-    ), yaxis_title = metric, xaxis = dict(showgrid = False), yaxis = dict(showgrid = False), margin=dict(r=5, l=5, t=50, b=5))
-    st.plotly_chart(fig, use_container_width = True)
-def hor_barchart(classification_df, metric = "Count Reviews"):
-    labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
-    data = {}
-    if metric == "Average Rating":
-        for i in labels:
-            data[i] = classification_df[classification_df[i] == 1]["rating"].mean()
-            data = pd.Series(data)
-    elif metric == "Count Reviews":
-        data = classification_df[labels].sum().sort_values(ascending = True)
-    fig = px.bar(data, orientation = "h", width = 350, title = "Most commented topic")
-    fig.update_layout(yaxis_title=None, height=400, xaxis_visible = False, showlegend = False, title=dict(font=dict(size=20), y=0.85,
-                      x=0))
-    st.plotly_chart(fig, use_container_width=True)
-def print_reviews(classification_df):
-    col_1, col_2 = st.columns([1, 3], gap="large")
-    with col_1:
-        labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
-        viewing_method = st.selectbox("$$ \\bold{Viewing \: method: } $$", ["Individual Comments", "Wordcloud"])
-        filter_by = st.multiselect("$$ \\bold{Select \: reviews \: related \: to: } $$", labels)
-        if filter_by:
-            selected_labels = ", ".join(filter_by)
-        else:
-            selected_labels = "All topics"
-        df_filter = classification_df.copy()
-        if filter_by:
-            other = list(set(labels) - set(filter_by))
-            df_filter = df_filter[df_filter[filter_by].all(axis=1) & ~df_filter[other].any(axis=1)]
-    if viewing_method == "Individual Comments":
-        with col_1:
-            filter_rating = st.slider("$$ \\bold{Rating \: range: \:} $$", min_value = 1, max_value= 5, value = (1, 5), step= 1)
-            df_filter = df_filter[(df_filter["rating"] >= filter_rating[0]) & (df_filter["rating"] <= filter_rating[1])]
-            df_filter = df_filter.sort_values("time", ascending = False)
-            if not df_filter.empty:
-                top_n = st.slider("$$ \\bold{Print \: top: \:} $$", min_value = 1, max_value= len(df_filter), value = int(len(df_filter) / 10), step= 1)
-            else:
-                top_n = 0
-        with col_2:
-            st.header(f"Displaying {top_n} most recent reviews related to :red[{selected_labels}]")
-            if not df_filter.empty:
-                comment_container = st.container(height=300)
-                with comment_container:
-                    for i in range(top_n):
-                        st.markdown(f"**Reviews** **{i + 1}:**")
-                        comment = df_filter["comment"].iloc[i]
-                        time = df_filter["time"].iloc[i]
-                        rating = ":star:" * df_filter["rating"].iloc[i]
-                        sender = df_filter["username"].iloc[i]
-                        topics = [topic for topic in labels if df_filter[topic].iloc[i] == 1]
-                        topics_str = ", ".join(topics)
-                        col_1, col_2 = st.columns([1, 2])
-                        col_1.markdown(f"From: {sender}  \n Time: {time}  \n Rating: {rating}")
-                        col_1.markdown(f"Topics: {topics_str}")
-                        col_2.markdown(comment)
-                        st.markdown("---")
-            else:
-                st.markdown("No comment satisfy the condition")
-    else:
-        with col_2:
-            st.header(f"Wordcloud for reviews related to :red[{selected_labels}]")
-            text = " ".join(comment for comment in df_filter["comment"].str.lower().values)
-            text = word_tokenize(text, format = "text")
-            word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)
-            fig, ax = plt.subplots()
-            # Plot the word cloud on the axes
-            ax.imshow(word_cloud)
-            ax.axis("off")
-            st.pyplot(fig)
-def tornado_chart(df):
-    labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
-    avg_rating = {}
-    for i in labels:
-        avg_rating[i] = df[df[i] == 1]["rating"].mean()
-    count_reviews = df[labels].sum().sort_values(ascending = True)
-    avg_rating = pd.Series(avg_rating).reindex(index = count_reviews.index)
-    fig = make_subplots(
-            rows=1
-            ,cols=2
-            ,vertical_spacing=0
-    )
-    fig_add = fig.add_trace(
-                go.Histogram(
-                    x= count_reviews.values
-                    ,y= count_reviews.index
-                    ,histfunc='sum'
-                    ,orientation='h'
-                    ,opacity=0.6, name='Count Reviews')
-                ,row=1
-                ,col=1
-    )
-    fig_add = fig.add_trace(
-                go.Histogram(
-                    x= avg_rating.values
-                    ,y= avg_rating.index
-                    ,histfunc='sum'
-                    ,orientation='h'
-                    ,opacity=0.6, name='Average Rating')
-                ,row=1
-                ,col=2
-    )
-    fig_add = fig.update_xaxes(
-                autorange="reversed"
-                ,row=1
-                ,col=1)
-    fig_add = fig.update_xaxes(
-                tickmode='linear'
-                ,dtick=1
-                ,row=1
-                ,col=2)
-    fig_add = fig.update_layout(
-        title="Review Count and Average Rating by Topic",
-    )
-    fig_add = fig.update_yaxes(
-                visible=False
-                ,row=1
-                ,col=2)
-    fig.update_layout(xaxis=dict(domain=[0.0, 0.45]), xaxis2=dict(domain=[0.45, 0.90]))
-    fig.update_layout(legend=dict(orientation='h', xanchor='center', x=0.45))
-    fig.update_layout(
-    width=500,
-    height=300
-    )
-    fig.update_layout(
-        title=dict(font=dict(size=18), y=1,  x=0),
-        legend=dict(
-        orientation="h",
-        entrywidth=90,
-        yanchor="bottom",
-        y= 0,
-        xanchor="right",
-        x=1, title = None, traceorder = "normal", yref = "container"
-    ))
-    fig.update_layout(margin=dict(r=5, l=5, t=50, b=0))
     st.plotly_chart(fig, use_container_width = True)

+import streamlit as st
+import plotly.express as px
+import plotly.graph_objects as go
+import pandas as pd
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+from wordcloud import WordCloud
+from underthesea import word_tokenize
+import matplotlib.pyplot as plt
+def sentence_topic_plot(result):
+    labels = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"][::-1]
+    values = result.detach().numpy()[0][::-1]
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+        y=labels,
+        x=values,
+        orientation='h'
+    ))
+    fig.update_layout(xaxis_title="Probability", yaxis_title="Topics")
+    st.plotly_chart(fig, use_container_width=True)
+def KPI_card(name = "Total Reviews", value = 1000, box_color = (123,167,212), font_color = (0, 0, 0), icon = "fa-list"):
+    wch_colour_box =  box_color
+    wch_colour_font =  font_color
+    fontsize = 20
+    valign = "left"
+    iconname = icon
+    sline = name # kpi name
+    i = value # kpi value
+    htmlstr = f"""<p style='background-color:rgb({wch_colour_box[0]}, {wch_colour_box[1]}, {wch_colour_box[2]});
+            color: rgb({wch_colour_font[0]}, {wch_colour_font[1]}, {wch_colour_font[2]});
+            font-size: {fontsize + 10}px;
+            font-family: "Source Sans Pro", sans-serif;
+            font-weight: 600;
+            border-radius: 20px;
+            padding-left: 20px;
+            padding-top: 30px;
+            padding-bottom: 30px;
+            line-height:30px; text-align: center;'>
+            <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
+            <i class="fa-solid fas {iconname} fa-2x fa-pull-left "></i>
+            <span style='font-size: {fontsize}px;
+            margin-top: 0'>{sline}</span><BR>
+            {i}
+            </style>
+            </p>"""
+    return htmlstr
+def rating_distribution_pie_chart(classification_df):
+    pie_data = classification_df["rating"].value_counts().sort_index()
+    fig_pie = px.pie(pie_data,height=150, values=pie_data.values, names=pie_data.index, color=pie_data.values, color_discrete_sequence=px.colors.sequential.Blues)
+    fig_pie.update_traces(sort=False)
+    fig_pie.update_layout(margin=dict(t=0, b=0, l=0, r=0))
+    st.plotly_chart(fig_pie, use_container_width=True,height=100)
+def kpi_total_reviews(classification_df):
+    total = str(len(classification_df))
+    total_reviews_card = KPI_card(name = "Total Reviews", value = total, icon = "fa-list")
+    st.markdown(total_reviews_card, unsafe_allow_html=True)
+def kpi_average_rating(classification_df):
+    average_rating = round(classification_df["rating"].mean(), 1)
+    avg_rating_card = KPI_card(name = "Average Rating", value = average_rating, icon = "fa-star")
+    st.markdown(avg_rating_card, unsafe_allow_html=True)
+def time_series_comments(classification_df, freq = "D", metric = "Count Reviews"):
+    labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
+    ts_plot_data = pd.DataFrame()
+    grouped_by_day = classification_df.groupby(pd.Grouper(key='time', freq = freq))
+    if metric == "Count Reviews":
+        ts_plot_data[labels] = grouped_by_day[labels].sum()
+        ts_plot_data["Total"] = grouped_by_day["comment"].count()
+        ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
+    elif metric == "Average Rating":
+        for i in labels:
+            ts_plot_data[i] = classification_df[classification_df[i] == 1].groupby(pd.Grouper(key='time', freq = freq))["rating"].mean()
+        ts_plot_data["Total"] = grouped_by_day["rating"].mean()
+        ts_plot_data["time"]= pd.to_datetime(ts_plot_data.index)
+    ts_plot_data = ts_plot_data.ffill()
+    fig = px.line(ts_plot_data, x = "time", y = ts_plot_data.columns, height=300, title = f"{metric} on time")
+    fig.update_layout(
+        title=dict(font=dict(size=20), y=1,  x=0),
+        legend=dict(
+        orientation="h",
+        entrywidth=90,
+        yanchor="bottom",
+        y= 0,
+        xanchor="right",
+        x=1, title = None, traceorder = "normal", yref = "container"
+    ), yaxis_title = metric, xaxis = dict(showgrid = False), yaxis = dict(showgrid = False), margin=dict(r=5, l=5, t=50, b=5))
+    st.plotly_chart(fig, use_container_width = True)
+def hor_barchart(classification_df, metric = "Count Reviews"):
+    labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
+    data = {}
+    if metric == "Average Rating":
+        for i in labels:
+            data[i] = classification_df[classification_df[i] == 1]["rating"].mean()
+            data = pd.Series(data)
+    elif metric == "Count Reviews":
+        data = classification_df[labels].sum().sort_values(ascending = True)
+    fig = px.bar(data, orientation = "h", width = 350, title = "Most commented topic")
+    fig.update_layout(yaxis_title=None, height=400, xaxis_visible = False, showlegend = False, title=dict(font=dict(size=20), y=0.85,
+                      x=0))
+    st.plotly_chart(fig, use_container_width=True)
+def print_reviews(classification_df):
+    col_1, col_2 = st.columns([1, 3], gap="large")
+    with col_1:
+        labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
+        viewing_method = st.selectbox("$$ \\bold{Viewing \: method: } $$", ["Individual Comments", "Wordcloud"])
+        filter_by = st.multiselect("$$ \\bold{Select \: reviews \: related \: to: } $$", labels)
+        if filter_by:
+            selected_labels = ", ".join(filter_by)
+        else:
+            selected_labels = "All topics"
+        df_filter = classification_df.copy()
+        if filter_by:
+            other = list(set(labels) - set(filter_by))
+            df_filter = df_filter[df_filter[filter_by].all(axis=1) & ~df_filter[other].any(axis=1)]
+    if viewing_method == "Individual Comments":
+        with col_1:
+            filter_rating = st.slider("$$ \\bold{Rating \: range: \:} $$", min_value = 1, max_value= 5, value = (1, 5), step= 1)
+            df_filter = df_filter[(df_filter["rating"] >= filter_rating[0]) & (df_filter["rating"] <= filter_rating[1])]
+            df_filter = df_filter.sort_values("time", ascending = False)
+            if not df_filter.empty:
+                top_n = st.slider("$$ \\bold{Print \: top: \:} $$", min_value = 1, max_value= len(df_filter), value = int(len(df_filter) / 10), step= 1)
+            else:
+                top_n = 0
+        with col_2:
+            st.header(f"Displaying {top_n} most recent reviews related to :red[{selected_labels}]")
+            if not df_filter.empty:
+                comment_container = st.container(height=300)
+                with comment_container:
+                    for i in range(top_n):
+                        st.markdown(f"**Reviews** **{i + 1}:**")
+                        comment = df_filter["comment"].iloc[i]
+                        time = df_filter["time"].iloc[i]
+                        rating = ":star:" * df_filter["rating"].iloc[i]
+                        sender = df_filter["username"].iloc[i]
+                        topics = [topic for topic in labels if df_filter[topic].iloc[i] == 1]
+                        topics_str = ", ".join(topics)
+                        col_1, col_2 = st.columns([1, 2])
+                        col_1.markdown(f"From: {sender}  \n Time: {time}  \n Rating: {rating}")
+                        col_1.markdown(f"Topics: {topics_str}")
+                        col_2.markdown(comment)
+                        st.markdown("---")
+            else:
+                st.markdown("No comment satisfy the condition")
+    else:
+        with col_2:
+            st.header(f"Wordcloud for reviews related to :red[{selected_labels}]")
+            text = " ".join(comment for comment in df_filter["comment"].str.lower().values)
+            text = word_tokenize(text, format = "text")
+            word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)
+            fig, ax = plt.subplots()
+            # Plot the word cloud on the axes
+            ax.imshow(word_cloud)
+            ax.axis("off")
+            st.pyplot(fig)
+def tornado_chart(df):
+    labels = ["Quality", "Serve", "Pack", "Shipping", "Price"]
+    avg_rating = {}
+    for i in labels:
+        avg_rating[i] = df[df[i] == 1]["rating"].mean()
+    count_reviews = df[labels].sum().sort_values(ascending = True)
+    avg_rating = pd.Series(avg_rating).reindex(index = count_reviews.index)
+    fig = make_subplots(
+            rows=1
+            ,cols=2
+            ,vertical_spacing=0
+    )
+    fig_add = fig.add_trace(
+                go.Histogram(
+                    x= count_reviews.values
+                    ,y= count_reviews.index
+                    ,histfunc='sum'
+                    ,orientation='h'
+                    ,opacity=0.6, name='Count Reviews')
+                ,row=1
+                ,col=1
+    )
+    fig_add = fig.add_trace(
+                go.Histogram(
+                    x= avg_rating.values
+                    ,y= avg_rating.index
+                    ,histfunc='sum'
+                    ,orientation='h'
+                    ,opacity=0.6, name='Average Rating')
+                ,row=1
+                ,col=2
+    )
+    fig_add = fig.update_xaxes(
+                autorange="reversed"
+                ,row=1
+                ,col=1)
+    fig_add = fig.update_xaxes(
+                tickmode='linear'
+                ,dtick=1
+                ,row=1
+                ,col=2)
+    fig_add = fig.update_layout(
+        title="Review Count and Average Rating by Topic",
+    )
+    fig_add = fig.update_yaxes(
+                visible=False
+                ,row=1
+                ,col=2)
+    fig.update_layout(xaxis=dict(domain=[0.0, 0.45]), xaxis2=dict(domain=[0.45, 0.90]))
+    fig.update_layout(legend=dict(orientation='h', xanchor='center', x=0.45))
+    fig.update_layout(
+    width=500,
+    height=300
+    )
+    fig.update_layout(
+        title=dict(font=dict(size=18), y=1,  x=0),
+        legend=dict(
+        orientation="h",
+        entrywidth=90,
+        yanchor="bottom",
+        y= 0,
+        xanchor="right",
+        x=1, title = None, traceorder = "normal", yref = "container"
+    ))
+    fig.update_layout(margin=dict(r=5, l=5, t=50, b=0))
     st.plotly_chart(fig, use_container_width = True)