Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -107,8 +107,13 @@ def preprocess_data(df):
|
|
| 107 |
|
| 108 |
return df
|
| 109 |
|
| 110 |
-
def
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
vectorizer = TfidfVectorizer(stop_words='english')
|
| 113 |
X = vectorizer.fit_transform(df['texts'])
|
| 114 |
|
|
@@ -134,16 +139,18 @@ def visualize_clusters(df):
|
|
| 134 |
|
| 135 |
def main(file, num_clusters_to_display):
|
| 136 |
try:
|
| 137 |
-
|
| 138 |
-
if
|
| 139 |
df = pd.read_excel(file)
|
| 140 |
-
|
|
|
|
| 141 |
df = pd.read_csv(file)
|
|
|
|
| 142 |
else:
|
| 143 |
-
return "Unsupported file format. Please upload
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
df = cluster_data(df)
|
| 147 |
visualize_clusters(df)
|
| 148 |
|
| 149 |
cluster_sizes = df['Cluster'].value_counts()
|
|
@@ -168,7 +175,7 @@ def main(file, num_clusters_to_display):
|
|
| 168 |
interface = gr.Interface(
|
| 169 |
fn=main,
|
| 170 |
inputs=[
|
| 171 |
-
gr.File(label="Upload Excel or CSV File (.xlsx
|
| 172 |
gr.Slider(1, 10, step=1, label="Number of Categories to Display")
|
| 173 |
],
|
| 174 |
outputs=gr.File(label="Clustered Data CSV"),
|
|
|
|
| 107 |
|
| 108 |
return df
|
| 109 |
|
| 110 |
+
def preprocess_csv_data(df):
|
| 111 |
+
df = df[df['Answer'] == 'Fallback Message shown']
|
| 112 |
+
df.rename(columns={'Question': 'texts'}, inplace=True)
|
| 113 |
+
df['texts'] = df['texts'].astype(str)
|
| 114 |
+
return preprocess_data(df)
|
| 115 |
+
|
| 116 |
+
def cluster_data(df, num_clusters):
|
| 117 |
vectorizer = TfidfVectorizer(stop_words='english')
|
| 118 |
X = vectorizer.fit_transform(df['texts'])
|
| 119 |
|
|
|
|
| 139 |
|
| 140 |
def main(file, num_clusters_to_display):
|
| 141 |
try:
|
| 142 |
+
file_ext = file.name.split('.')[-1].lower()
|
| 143 |
+
if file_ext == 'xlsx':
|
| 144 |
df = pd.read_excel(file)
|
| 145 |
+
df = preprocess_data(df)
|
| 146 |
+
elif file_ext == 'csv':
|
| 147 |
df = pd.read_csv(file)
|
| 148 |
+
df = preprocess_csv_data(df)
|
| 149 |
else:
|
| 150 |
+
return "Unsupported file format. Please upload an Excel (.xlsx) or CSV (.csv) file."
|
| 151 |
+
|
| 152 |
+
num_clusters = 10 # Set the number of clusters
|
| 153 |
+
df = cluster_data(df, num_clusters)
|
| 154 |
visualize_clusters(df)
|
| 155 |
|
| 156 |
cluster_sizes = df['Cluster'].value_counts()
|
|
|
|
| 175 |
interface = gr.Interface(
|
| 176 |
fn=main,
|
| 177 |
inputs=[
|
| 178 |
+
gr.File(label="Upload Excel or CSV File (.xlsx or .csv)"),
|
| 179 |
gr.Slider(1, 10, step=1, label="Number of Categories to Display")
|
| 180 |
],
|
| 181 |
outputs=gr.File(label="Clustered Data CSV"),
|