tanish78 commited on
Commit
aa1c7d7
·
verified ·
1 Parent(s): 148df09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -129,9 +129,12 @@ def preprocess_data(df):
129
  df['texts'] = df['texts'].apply(lambda x: x.strip()) # Remove leading and trailing whitespaces
130
  df = df[df['texts'] != '']
131
 
 
 
132
  return df
133
 
134
  def cluster_data(df, num_clusters=5):
 
135
  # Vectorize the text data
136
  vectorizer = TfidfVectorizer(stop_words='english')
137
  X = vectorizer.fit_transform(df['texts'])
@@ -147,6 +150,7 @@ def cluster_data(df, num_clusters=5):
147
  df['PCA1'] = principal_components[:, 0]
148
  df['PCA2'] = principal_components[:, 1]
149
 
 
150
  return df
151
 
152
  def visualize_clusters(df):
 
129
  df['texts'] = df['texts'].apply(lambda x: x.strip()) # Remove leading and trailing whitespaces
130
  df = df[df['texts'] != '']
131
 
132
+ print("Preprocessing completed.")
133
+
134
  return df
135
 
136
  def cluster_data(df, num_clusters=5):
137
+ print("Clustering data...")
138
  # Vectorize the text data
139
  vectorizer = TfidfVectorizer(stop_words='english')
140
  X = vectorizer.fit_transform(df['texts'])
 
150
  df['PCA1'] = principal_components[:, 0]
151
  df['PCA2'] = principal_components[:, 1]
152
 
153
+ print("Clustering completed.")
154
  return df
155
 
156
  def visualize_clusters(df):