Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -131,7 +131,10 @@ def visualize_clusters(df):
|
|
| 131 |
plt.title('Clusters of User Queries')
|
| 132 |
plt.xlabel('PCA Component 1')
|
| 133 |
plt.ylabel('PCA Component 2')
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
def silhouette_analysis(X, labels, num_clusters):
|
| 137 |
fig, ax1 = plt.subplots(1, 1)
|
|
@@ -160,7 +163,10 @@ def silhouette_analysis(X, labels, num_clusters):
|
|
| 160 |
ax1.set_yticks([])
|
| 161 |
ax1.set_xticks([i/10.0 for i in range(-1, 11)])
|
| 162 |
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
def main(file, num_clusters_to_display):
|
| 166 |
try:
|
|
@@ -171,7 +177,8 @@ def main(file, num_clusters_to_display):
|
|
| 171 |
|
| 172 |
df = preprocess_data(df)
|
| 173 |
df, X, kmeans = cluster_data(df, num_clusters=15)
|
| 174 |
-
|
|
|
|
| 175 |
|
| 176 |
cluster_sizes = df['Cluster'].value_counts()
|
| 177 |
sorted_clusters = cluster_sizes.index.tolist()
|
|
@@ -191,9 +198,9 @@ def main(file, num_clusters_to_display):
|
|
| 191 |
|
| 192 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
|
| 193 |
df.to_csv(tmpfile.name, index=False)
|
| 194 |
-
|
| 195 |
-
return tmpfile.name, silhouette_avg, silhouette_plot
|
| 196 |
except Exception as e:
|
|
|
|
| 197 |
return str(e), None, None
|
| 198 |
|
| 199 |
interface = gr.Interface(
|
|
@@ -205,10 +212,10 @@ interface = gr.Interface(
|
|
| 205 |
outputs=[
|
| 206 |
gr.File(label="Clustered Data CSV"),
|
| 207 |
gr.Number(label="Silhouette Score"),
|
| 208 |
-
gr.
|
| 209 |
],
|
| 210 |
title="Unanswered User Queries Clustering",
|
| 211 |
description="Unanswered User Query Categorization"
|
| 212 |
)
|
| 213 |
|
| 214 |
-
interface.launch(share=True)
|
|
|
|
| 131 |
plt.title('Clusters of User Queries')
|
| 132 |
plt.xlabel('PCA Component 1')
|
| 133 |
plt.ylabel('PCA Component 2')
|
| 134 |
+
buf = BytesIO()
|
| 135 |
+
plt.savefig(buf, format='png')
|
| 136 |
+
buf.seek(0)
|
| 137 |
+
return buf
|
| 138 |
|
| 139 |
def silhouette_analysis(X, labels, num_clusters):
|
| 140 |
fig, ax1 = plt.subplots(1, 1)
|
|
|
|
| 163 |
ax1.set_yticks([])
|
| 164 |
ax1.set_xticks([i/10.0 for i in range(-1, 11)])
|
| 165 |
|
| 166 |
+
buf = BytesIO()
|
| 167 |
+
plt.savefig(buf, format='png')
|
| 168 |
+
buf.seek(0)
|
| 169 |
+
return buf
|
| 170 |
|
| 171 |
def main(file, num_clusters_to_display):
|
| 172 |
try:
|
|
|
|
| 177 |
|
| 178 |
df = preprocess_data(df)
|
| 179 |
df, X, kmeans = cluster_data(df, num_clusters=15)
|
| 180 |
+
|
| 181 |
+
cluster_plot = visualize_clusters(df)
|
| 182 |
|
| 183 |
cluster_sizes = df['Cluster'].value_counts()
|
| 184 |
sorted_clusters = cluster_sizes.index.tolist()
|
|
|
|
| 198 |
|
| 199 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
|
| 200 |
df.to_csv(tmpfile.name, index=False)
|
| 201 |
+
return tmpfile.name, silhouette_avg, silhouette_plot
|
|
|
|
| 202 |
except Exception as e:
|
| 203 |
+
print(f"Error: {e}")
|
| 204 |
return str(e), None, None
|
| 205 |
|
| 206 |
interface = gr.Interface(
|
|
|
|
| 212 |
outputs=[
|
| 213 |
gr.File(label="Clustered Data CSV"),
|
| 214 |
gr.Number(label="Silhouette Score"),
|
| 215 |
+
gr.Image(label="Silhouette Plot")
|
| 216 |
],
|
| 217 |
title="Unanswered User Queries Clustering",
|
| 218 |
description="Unanswered User Query Categorization"
|
| 219 |
)
|
| 220 |
|
| 221 |
+
interface.launch(share=True)
|