tanish78 commited on
Commit
46cfa3b
·
verified ·
1 Parent(s): ae43fca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -8,7 +8,6 @@ import re
8
  from io import BytesIO
9
 
10
  def preprocess_data(df):
11
- print("Preprocessing data...")
12
 
13
  # Renaming the 'Queries' column to 'texts'
14
  df.rename(columns={'Queries': 'texts'}, inplace=True)
@@ -129,12 +128,12 @@ def preprocess_data(df):
129
  df['texts'] = df['texts'].apply(lambda x: x.strip()) # Remove leading and trailing whitespaces
130
  df = df[df['texts'] != '']
131
 
132
- print("Preprocessing completed.")
133
 
134
  return df
135
 
136
  def cluster_data(df, num_clusters=5):
137
- print("Clustering data...")
138
  # Vectorize the text data
139
  vectorizer = TfidfVectorizer(stop_words='english')
140
  X = vectorizer.fit_transform(df['texts'])
@@ -150,7 +149,7 @@ def cluster_data(df, num_clusters=5):
150
  df['PCA1'] = principal_components[:, 0]
151
  df['PCA2'] = principal_components[:, 1]
152
 
153
- print("Clustering completed.")
154
  return df
155
 
156
  def visualize_clusters(df):
@@ -165,18 +164,18 @@ def visualize_clusters(df):
165
 
166
  def main(file, num_clusters):
167
  try:
168
- print("Reading the file...")
169
  df = pd.read_excel(file)
170
- print("File read successfully.")
171
 
172
  df = preprocess_data(df)
173
  df = cluster_data(df, num_clusters)
174
  visualize_clusters(df)
175
 
176
- print("Returning the dataframe.")
177
  return df
178
  except Exception as e:
179
- print("Error occurred:", e)
180
  return str(e)
181
 
182
  interface = gr.Interface(
 
8
  from io import BytesIO
9
 
10
  def preprocess_data(df):
 
11
 
12
  # Renaming the 'Queries' column to 'texts'
13
  df.rename(columns={'Queries': 'texts'}, inplace=True)
 
128
  df['texts'] = df['texts'].apply(lambda x: x.strip()) # Remove leading and trailing whitespaces
129
  df = df[df['texts'] != '']
130
 
131
+
132
 
133
  return df
134
 
135
  def cluster_data(df, num_clusters=5):
136
+
137
  # Vectorize the text data
138
  vectorizer = TfidfVectorizer(stop_words='english')
139
  X = vectorizer.fit_transform(df['texts'])
 
149
  df['PCA1'] = principal_components[:, 0]
150
  df['PCA2'] = principal_components[:, 1]
151
 
152
+
153
  return df
154
 
155
  def visualize_clusters(df):
 
164
 
165
  def main(file, num_clusters):
166
  try:
167
+
168
  df = pd.read_excel(file)
169
+
170
 
171
  df = preprocess_data(df)
172
  df = cluster_data(df, num_clusters)
173
  visualize_clusters(df)
174
 
175
+
176
  return df
177
  except Exception as e:
178
+
179
  return str(e)
180
 
181
  interface = gr.Interface(