tanish78 commited on
Commit
52378d9
·
verified ·
1 Parent(s): 1c0a2ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -108,7 +108,7 @@ def preprocess_data(df):
108
  return df
109
 
110
  def cluster_data(df):
111
- num_clusters = 15 # Set the number of clusters
112
  vectorizer = TfidfVectorizer(stop_words='english')
113
  X = vectorizer.fit_transform(df['texts'])
114
 
@@ -145,11 +145,10 @@ def main(file, num_clusters_to_display):
145
 
146
  cluster_sizes = df['Cluster'].value_counts()
147
  sorted_clusters = cluster_sizes.index.tolist()
148
- df['Cluster'] = pd.Categorical(df['Cluster'], categories=sorted_clusters, ordered=True)
149
- df = df.sort_values('Cluster')
150
-
151
- # Filter out base cluster and get the largest clusters
152
- filtered_clusters = [cluster for cluster in sorted_clusters if cluster != 10]
153
  top_clusters = filtered_clusters[:num_clusters_to_display]
154
 
155
  df = df[df['Cluster'].isin(top_clusters)]
 
108
  return df
109
 
110
  def cluster_data(df):
111
+ num_clusters = 15 # Set the number of clusters
112
  vectorizer = TfidfVectorizer(stop_words='english')
113
  X = vectorizer.fit_transform(df['texts'])
114
 
 
145
 
146
  cluster_sizes = df['Cluster'].value_counts()
147
  sorted_clusters = cluster_sizes.index.tolist()
148
+
149
+ # Exclude the largest cluster
150
+ largest_cluster = sorted_clusters[0]
151
+ filtered_clusters = [cluster for cluster in sorted_clusters if cluster != largest_cluster]
 
152
  top_clusters = filtered_clusters[:num_clusters_to_display]
153
 
154
  df = df[df['Cluster'].isin(top_clusters)]