Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -108,7 +108,7 @@ def preprocess_data(df):
|
|
| 108 |
return df
|
| 109 |
|
| 110 |
def cluster_data(df):
|
| 111 |
-
num_clusters = 15
|
| 112 |
vectorizer = TfidfVectorizer(stop_words='english')
|
| 113 |
X = vectorizer.fit_transform(df['texts'])
|
| 114 |
|
|
@@ -145,11 +145,10 @@ def main(file, num_clusters_to_display):
|
|
| 145 |
|
| 146 |
cluster_sizes = df['Cluster'].value_counts()
|
| 147 |
sorted_clusters = cluster_sizes.index.tolist()
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
filtered_clusters = [cluster for cluster in sorted_clusters if cluster != 10]
|
| 153 |
top_clusters = filtered_clusters[:num_clusters_to_display]
|
| 154 |
|
| 155 |
df = df[df['Cluster'].isin(top_clusters)]
|
|
|
|
| 108 |
return df
|
| 109 |
|
| 110 |
def cluster_data(df):
|
| 111 |
+
num_clusters = 15 # Set the number of clusters
|
| 112 |
vectorizer = TfidfVectorizer(stop_words='english')
|
| 113 |
X = vectorizer.fit_transform(df['texts'])
|
| 114 |
|
|
|
|
| 145 |
|
| 146 |
cluster_sizes = df['Cluster'].value_counts()
|
| 147 |
sorted_clusters = cluster_sizes.index.tolist()
|
| 148 |
+
|
| 149 |
+
# Exclude the largest cluster
|
| 150 |
+
largest_cluster = sorted_clusters[0]
|
| 151 |
+
filtered_clusters = [cluster for cluster in sorted_clusters if cluster != largest_cluster]
|
|
|
|
| 152 |
top_clusters = filtered_clusters[:num_clusters_to_display]
|
| 153 |
|
| 154 |
df = df[df['Cluster'].isin(top_clusters)]
|