tanish78 commited on
Commit
a5e7123
·
verified ·
1 Parent(s): 162880f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -7,13 +7,8 @@ from sklearn.decomposition import PCA
7
  import re
8
  from io import BytesIO
9
  import tempfile
10
- from datetime import datetime
11
 
12
  def preprocess_data(df):
13
- # Filter based on the 'Answer' column and the date
14
- df = df[(df['Answer'] == 'Fallback Message shown') & (pd.to_datetime(df['Date'], dayfirst=True) > datetime(2024, 7, 1))]
15
-
16
- # Rename and preprocess the 'Question Asked' column
17
  df.rename(columns={'Question Asked': 'texts'}, inplace=True)
18
  df['texts'] = df['texts'].astype(str)
19
  df['texts'] = df['texts'].str.lower()
@@ -21,17 +16,17 @@ def preprocess_data(df):
21
 
22
  def remove_emoji(string):
23
  emoji_pattern = re.compile("["
24
- u"\U0001F600-\U0001F64F" # emoticons
25
- u"\U0001F300-\U0001F5FF" # symbols & pictographs
26
- u"\U0001F680-\U0001F6FF" # transport & map symbols
27
- u"\U0001F1E0-\U0001F1FF" # flags (iOS)
28
  u"\U00002702-\U000027B0"
29
  u"\U000024C2-\U0001F251"
30
  "]+", flags=re.UNICODE)
31
- return emoji_pattern.sub(r'', string)
32
 
33
  df['texts'] = df['texts'].apply(remove_emoji)
34
-
35
  custom_synonyms = {
36
  'application': ['form'],
37
  'apply': ['fill', 'applied'],
@@ -139,7 +134,11 @@ def visualize_clusters(df):
139
 
140
  def main(file, num_clusters_to_display):
141
  try:
142
- df = pd.read_csv(file.name)
 
 
 
 
143
  df = preprocess_data(df)
144
  df = cluster_data(df)
145
  visualize_clusters(df)
@@ -153,7 +152,7 @@ def main(file, num_clusters_to_display):
153
  filtered_clusters = [cluster for cluster in sorted_clusters if cluster != 0]
154
  top_clusters = filtered_clusters[:num_clusters_to_display]
155
 
156
- df = df[df['texts'].isin(top_clusters)]
157
  df['Cluster'] = pd.Categorical(df['Cluster'], categories=top_clusters, ordered=True)
158
  df = df.sort_values('Cluster')
159
 
@@ -167,11 +166,11 @@ interface = gr.Interface(
167
  fn=main,
168
  inputs=[
169
  gr.File(label="Upload CSV File (.csv)"),
170
- gr.Slider(1, 10, step=1, label="Number of Categories to Display")
171
  ],
172
  outputs=gr.File(label="Clustered Data CSV"),
173
  title="Unanswered User Queries Clustering",
174
- description="Upload a CSV file (.csv) and select the number of largest clusters to display (excluding cluster 0)"
175
  )
176
 
177
- interface.launch()
 
7
  import re
8
  from io import BytesIO
9
  import tempfile
 
10
 
11
  def preprocess_data(df):
 
 
 
 
12
  df.rename(columns={'Question Asked': 'texts'}, inplace=True)
13
  df['texts'] = df['texts'].astype(str)
14
  df['texts'] = df['texts'].str.lower()
 
16
 
17
  def remove_emoji(string):
18
  emoji_pattern = re.compile("["
19
+ u"\U0001F600-\U0001F64F"
20
+ u"\U0001F300-\U0001F5FF"
21
+ u"\U0001F680-\U0001F6FF"
22
+ u"\U0001F1E0-\U0001F1FF"
23
  u"\U00002702-\U000027B0"
24
  u"\U000024C2-\U0001F251"
25
  "]+", flags=re.UNICODE)
26
+ return emoji_pattern.sub(r'', string) if isinstance(string, str) else string
27
 
28
  df['texts'] = df['texts'].apply(remove_emoji)
29
+
30
  custom_synonyms = {
31
  'application': ['form'],
32
  'apply': ['fill', 'applied'],
 
134
 
135
  def main(file, num_clusters_to_display):
136
  try:
137
+ df = pd.read_csv(file)
138
+
139
+ # Filter by 'Fallback Message shown' and date after '01/07/24'
140
+ df = df[(df['Answer'] == 'Fallback Message shown') & (pd.to_datetime(df['Date and Time']) > '2024-07-01')]
141
+
142
  df = preprocess_data(df)
143
  df = cluster_data(df)
144
  visualize_clusters(df)
 
152
  filtered_clusters = [cluster for cluster in sorted_clusters if cluster != 0]
153
  top_clusters = filtered_clusters[:num_clusters_to_display]
154
 
155
+ df = df[df['Cluster'].isin(top_clusters)]
156
  df['Cluster'] = pd.Categorical(df['Cluster'], categories=top_clusters, ordered=True)
157
  df = df.sort_values('Cluster')
158
 
 
166
  fn=main,
167
  inputs=[
168
  gr.File(label="Upload CSV File (.csv)"),
169
+ gr.Slider(label="Number of Categories to Display", minimum=1, maximum=20, step=1, value=5)
170
  ],
171
  outputs=gr.File(label="Clustered Data CSV"),
172
  title="Unanswered User Queries Clustering",
173
+ description="Upload a CSV file (.csv) and select the number of largest clusters to display (excluding cluster 0)."
174
  )
175
 
176
+ interface.launch(share=True)