Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from io import BytesIO
|
|
| 7 |
import tempfile
|
| 8 |
from wordcloud import WordCloud, STOPWORDS
|
| 9 |
import matplotlib.pyplot as plt
|
|
|
|
| 10 |
from PIL import Image
|
| 11 |
|
| 12 |
def preprocess_data(df):
|
|
@@ -99,6 +100,26 @@ def generate_wordcloud(df):
|
|
| 99 |
img = Image.open(buf)
|
| 100 |
return img
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
def main(file, num_clusters_to_display):
|
| 103 |
try:
|
| 104 |
df = pd.read_csv(file)
|
|
@@ -122,15 +143,16 @@ def main(file, num_clusters_to_display):
|
|
| 122 |
df = df.sort_values('Cluster')
|
| 123 |
|
| 124 |
wordcloud_img = generate_wordcloud(df)
|
|
|
|
| 125 |
|
| 126 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
|
| 127 |
df.to_csv(tmpfile.name, index=False)
|
| 128 |
csv_file_path = tmpfile.name
|
| 129 |
|
| 130 |
-
return csv_file_path, wordcloud_img
|
| 131 |
except Exception as e:
|
| 132 |
print(f"Error: {e}")
|
| 133 |
-
return str(e), None
|
| 134 |
|
| 135 |
interface = gr.Interface(
|
| 136 |
fn=main,
|
|
@@ -140,7 +162,8 @@ interface = gr.Interface(
|
|
| 140 |
],
|
| 141 |
outputs=[
|
| 142 |
gr.File(label="Clustered Data CSV"),
|
| 143 |
-
gr.Image(label="Word Cloud")
|
|
|
|
| 144 |
],
|
| 145 |
title="Unanswered User Queries Clustering",
|
| 146 |
description="Unanswered User Query Categorization"
|
|
|
|
| 7 |
import tempfile
|
| 8 |
from wordcloud import WordCloud, STOPWORDS
|
| 9 |
import matplotlib.pyplot as plt
|
| 10 |
+
import plotly.express as px
|
| 11 |
from PIL import Image
|
| 12 |
|
| 13 |
def preprocess_data(df):
|
|
|
|
| 100 |
img = Image.open(buf)
|
| 101 |
return img
|
| 102 |
|
| 103 |
+
def generate_bar_chart(df, num_clusters_to_display):
|
| 104 |
+
top_clusters = df['Cluster'].value_counts().index[1:num_clusters_to_display+1]
|
| 105 |
+
df_top_clusters = df[df['Cluster'].isin(top_clusters)]
|
| 106 |
+
|
| 107 |
+
cluster_top_words = df_top_clusters.groupby('Cluster')['texts'].apply(lambda x: ' '.join(x)).reset_index()
|
| 108 |
+
cluster_top_words['top_word'] = cluster_top_words['texts'].apply(lambda x: pd.Series(x.split()).value_counts().index[0])
|
| 109 |
+
cluster_sizes = df_top_clusters['Cluster'].value_counts().reset_index()
|
| 110 |
+
cluster_sizes.columns = ['Cluster', 'Count']
|
| 111 |
+
cluster_sizes = cluster_sizes.merge(cluster_top_words[['Cluster', 'top_word']], on='Cluster')
|
| 112 |
+
|
| 113 |
+
fig = px.bar(cluster_sizes, x='Cluster', y='Count', text='top_word', title='Top Clusters by Frequency with Top Word/Phrase')
|
| 114 |
+
fig.update_traces(textposition='outside')
|
| 115 |
+
fig.update_layout(xaxis_title='Cluster', yaxis_title='Frequency', showlegend=False)
|
| 116 |
+
|
| 117 |
+
buf = BytesIO()
|
| 118 |
+
fig.write_image(buf, format='png')
|
| 119 |
+
buf.seek(0)
|
| 120 |
+
img = Image.open(buf)
|
| 121 |
+
return img
|
| 122 |
+
|
| 123 |
def main(file, num_clusters_to_display):
|
| 124 |
try:
|
| 125 |
df = pd.read_csv(file)
|
|
|
|
| 143 |
df = df.sort_values('Cluster')
|
| 144 |
|
| 145 |
wordcloud_img = generate_wordcloud(df)
|
| 146 |
+
bar_chart_img = generate_bar_chart(df, num_clusters_to_display)
|
| 147 |
|
| 148 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
|
| 149 |
df.to_csv(tmpfile.name, index=False)
|
| 150 |
csv_file_path = tmpfile.name
|
| 151 |
|
| 152 |
+
return csv_file_path, wordcloud_img, bar_chart_img
|
| 153 |
except Exception as e:
|
| 154 |
print(f"Error: {e}")
|
| 155 |
+
return str(e), None, None
|
| 156 |
|
| 157 |
interface = gr.Interface(
|
| 158 |
fn=main,
|
|
|
|
| 162 |
],
|
| 163 |
outputs=[
|
| 164 |
gr.File(label="Clustered Data CSV"),
|
| 165 |
+
gr.Image(label="Word Cloud"),
|
| 166 |
+
gr.Image(label="Bar Chart")
|
| 167 |
],
|
| 168 |
title="Unanswered User Queries Clustering",
|
| 169 |
description="Unanswered User Query Categorization"
|