Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -108,8 +108,7 @@ def preprocess_data(df):
|
|
| 108 |
return df
|
| 109 |
|
| 110 |
def cluster_data(df):
|
| 111 |
-
# Set the number of clusters
|
| 112 |
-
num_clusters = 5
|
| 113 |
vectorizer = TfidfVectorizer(stop_words='english')
|
| 114 |
X = vectorizer.fit_transform(df['texts'])
|
| 115 |
|
|
@@ -139,7 +138,12 @@ def main(file):
|
|
| 139 |
df = preprocess_data(df)
|
| 140 |
df = cluster_data(df)
|
| 141 |
visualize_clusters(df)
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
|
| 144 |
df.to_csv(tmpfile.name, index=False)
|
| 145 |
return tmpfile.name
|
|
|
|
| 108 |
return df
|
| 109 |
|
| 110 |
def cluster_data(df):
|
| 111 |
+
num_clusters = 15 # Set the number of clusters to 15
|
|
|
|
| 112 |
vectorizer = TfidfVectorizer(stop_words='english')
|
| 113 |
X = vectorizer.fit_transform(df['texts'])
|
| 114 |
|
|
|
|
| 138 |
df = preprocess_data(df)
|
| 139 |
df = cluster_data(df)
|
| 140 |
visualize_clusters(df)
|
| 141 |
+
|
| 142 |
+
cluster_sizes = df['Cluster'].value_counts()
|
| 143 |
+
sorted_clusters = cluster_sizes.index.tolist()
|
| 144 |
+
df['Cluster'] = pd.Categorical(df['Cluster'], categories=sorted_clusters, ordered=True)
|
| 145 |
+
df = df.sort_values('Cluster')
|
| 146 |
+
|
| 147 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
|
| 148 |
df.to_csv(tmpfile.name, index=False)
|
| 149 |
return tmpfile.name
|