tanish78 commited on
Commit
5d93751
·
verified ·
1 Parent(s): 6847e76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -107,7 +107,9 @@ def preprocess_data(df):
107
 
108
  return df
109
 
110
- def cluster_data(df, num_clusters=5):
 
 
111
  vectorizer = TfidfVectorizer(stop_words='english')
112
  X = vectorizer.fit_transform(df['texts'])
113
 
@@ -131,11 +133,11 @@ def visualize_clusters(df):
131
  plt.ylabel('PCA Component 2')
132
  plt.show()
133
 
134
- def main(file, num_clusters):
135
  try:
136
  df = pd.read_excel(file)
137
  df = preprocess_data(df)
138
- df = cluster_data(df, num_clusters)
139
  visualize_clusters(df)
140
 
141
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
@@ -146,10 +148,7 @@ def main(file, num_clusters):
146
 
147
  interface = gr.Interface(
148
  fn=main,
149
- inputs=[
150
- gr.File(label="Upload Excel File (.xlsx)"),
151
- gr.Number(value=5, label="Number of Clusters")
152
- ],
153
  outputs=gr.File(label="Clustered Data CSV"),
154
  title="Unanswered User Queries Clustering",
155
  description="Upload an Excel file (.xlsx)"
 
107
 
108
  return df
109
 
110
+ def cluster_data(df):
111
+ # Set the number of clusters here
112
+ num_clusters = 5
113
  vectorizer = TfidfVectorizer(stop_words='english')
114
  X = vectorizer.fit_transform(df['texts'])
115
 
 
133
  plt.ylabel('PCA Component 2')
134
  plt.show()
135
 
136
+ def main(file):
137
  try:
138
  df = pd.read_excel(file)
139
  df = preprocess_data(df)
140
+ df = cluster_data(df)
141
  visualize_clusters(df)
142
 
143
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
 
148
 
149
  interface = gr.Interface(
150
  fn=main,
151
+ inputs=gr.File(label="Upload Excel File (.xlsx)"),
 
 
 
152
  outputs=gr.File(label="Clustered Data CSV"),
153
  title="Unanswered User Queries Clustering",
154
  description="Upload an Excel file (.xlsx)"