tanish78 commited on
Commit
904a011
·
verified ·
1 Parent(s): 5d93751

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -108,8 +108,7 @@ def preprocess_data(df):
108
  return df
109
 
110
  def cluster_data(df):
111
- # Set the number of clusters here
112
- num_clusters = 5
113
  vectorizer = TfidfVectorizer(stop_words='english')
114
  X = vectorizer.fit_transform(df['texts'])
115
 
@@ -139,7 +138,12 @@ def main(file):
139
  df = preprocess_data(df)
140
  df = cluster_data(df)
141
  visualize_clusters(df)
142
-
 
 
 
 
 
143
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
144
  df.to_csv(tmpfile.name, index=False)
145
  return tmpfile.name
 
108
  return df
109
 
110
  def cluster_data(df):
111
+ num_clusters = 15 # Set the number of clusters to 15
 
112
  vectorizer = TfidfVectorizer(stop_words='english')
113
  X = vectorizer.fit_transform(df['texts'])
114
 
 
138
  df = preprocess_data(df)
139
  df = cluster_data(df)
140
  visualize_clusters(df)
141
+
142
+ cluster_sizes = df['Cluster'].value_counts()
143
+ sorted_clusters = cluster_sizes.index.tolist()
144
+ df['Cluster'] = pd.Categorical(df['Cluster'], categories=sorted_clusters, ordered=True)
145
+ df = df.sort_values('Cluster')
146
+
147
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
148
  df.to_csv(tmpfile.name, index=False)
149
  return tmpfile.name