tanish78 commited on
Commit
bd2d8e4
·
verified ·
1 Parent(s): c1344f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -20
app.py CHANGED
@@ -10,7 +10,8 @@ import matplotlib.pyplot as plt
10
  import plotly.express as px
11
  from PIL import Image
12
 
13
- categories_keywords = {
 
14
  'Application Status': ['application', 'applied', 'update on my application', 'result of my application', 'selected', 'selection process', 'apply', 'fellow', 'lesson plan', 'status of my application', 'application update', 'application status', 'applied for'],
15
  'Volunteering': ['volunteering', 'volunteer', 'volunteering certificate', 'resume my volunteering', 'volunteering journey', 'volunteering with TFI', 'volunteering opportunities', 'volunteer work', 'volunteer program'],
16
  'Certificates': ['certificate', 'certificates', 'certificate of completion', 'volunteer certificate', 'issue certificate'],
@@ -29,22 +30,29 @@ categories_keywords = {
29
  'Miscellaneous': []
30
  }
31
 
 
 
 
 
 
 
 
 
32
 
33
- def categorize_question(question):
 
 
 
34
  for category, keywords in categories_keywords.items():
35
  for keyword in keywords:
36
  if keyword.lower() in question.lower():
37
- # Check if the question is one word and belongs to 'End of Conversation'
38
  if category == 'End of Conversation':
39
  return category
40
- # If not 'End of Conversation', return the matched category
41
  if category != 'End of Conversation':
42
  return category
43
  return 'Miscellaneous'
44
 
45
-
46
-
47
- def preprocess_data(df):
48
  df.rename(columns={'Question Asked': 'texts'}, inplace=True)
49
  df['texts'] = df['texts'].astype(str).str.lower()
50
  df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
@@ -95,8 +103,7 @@ def preprocess_data(df):
95
  df['texts'] = df['texts'].str.strip()
96
  df = df[df['texts'] != '']
97
 
98
- # Categorize the texts
99
- df['Category'] = df['texts'].apply(categorize_question)
100
 
101
  return df
102
 
@@ -138,7 +145,6 @@ def generate_wordcloud(df):
138
  return img
139
 
140
  def generate_bar_chart(df, num_clusters_to_display):
141
- # Exclude common words from the top words
142
  common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
143
 
144
  top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
@@ -160,27 +166,22 @@ def generate_bar_chart(df, num_clusters_to_display):
160
  img = Image.open(buf)
161
  return img
162
 
163
- def main(file, num_clusters_to_display):
164
  try:
165
  df = pd.read_csv(file)
166
 
167
- # Filter by 'Fallback Message shown'
168
  df = df[df['Answer'] == 'Fallback Message shown']
169
 
170
- df = preprocess_data(df)
171
 
172
- # Get category sizes and sort by size in ascending order
173
  category_sizes = df['Category'].value_counts().reset_index()
174
  category_sizes.columns = ['Category', 'Count']
175
  sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
176
 
177
- # Get the largest x categories as specified by num_clusters_to_display
178
  largest_categories = sorted_categories[:num_clusters_to_display]
179
 
180
- # Filter the dataframe to include only the largest categories
181
  filtered_df = df[df['Category'].isin(largest_categories)]
182
 
183
- # Sort the dataframe by Category
184
  filtered_df = filtered_df.sort_values(by='Category')
185
 
186
  wordcloud_img = generate_wordcloud(filtered_df)
@@ -195,8 +196,31 @@ def main(file, num_clusters_to_display):
195
  print(f"Error: {e}")
196
  return str(e), None, None
197
 
198
- interface = gr.Interface(
199
- fn=main,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  inputs=[
201
  gr.File(label="Upload CSV File (.csv)"),
202
  gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
@@ -210,5 +234,6 @@ interface = gr.Interface(
210
  description="Categorize unanswered user queries into predefined categories"
211
  )
212
 
213
- interface.launch(share=True)
214
 
 
 
10
  import plotly.express as px
11
  from PIL import Image
12
 
13
+ # Define categories for Bot 1 and Bot 2
14
+ categories_keywords_bot1 = {
15
  'Application Status': ['application', 'applied', 'update on my application', 'result of my application', 'selected', 'selection process', 'apply', 'fellow', 'lesson plan', 'status of my application', 'application update', 'application status', 'applied for'],
16
  'Volunteering': ['volunteering', 'volunteer', 'volunteering certificate', 'resume my volunteering', 'volunteering journey', 'volunteering with TFI', 'volunteering opportunities', 'volunteer work', 'volunteer program'],
17
  'Certificates': ['certificate', 'certificates', 'certificate of completion', 'volunteer certificate', 'issue certificate'],
 
30
  'Miscellaneous': []
31
  }
32
 
33
+ categories_keywords_bot2 = {
34
+ 'Service Issues': ['service', 'support', 'help', 'assistance'],
35
+ 'Billing': ['bill', 'billing', 'invoice', 'payment', 'charge'],
36
+ 'Technical Problems': ['technical', 'error', 'problem', 'issue'],
37
+ 'Account Management': ['account', 'login', 'credentials', 'password'],
38
+ 'Product Information': ['product', 'information', 'details', 'specifications'],
39
+ 'Miscellaneous': []
40
+ }
41
 
42
+ # Initialize with Bot 1's categories
43
+ categories_keywords = categories_keywords_bot1
44
+
45
+ def categorize_question(question, categories_keywords):
46
  for category, keywords in categories_keywords.items():
47
  for keyword in keywords:
48
  if keyword.lower() in question.lower():
 
49
  if category == 'End of Conversation':
50
  return category
 
51
  if category != 'End of Conversation':
52
  return category
53
  return 'Miscellaneous'
54
 
55
+ def preprocess_data(df, categories_keywords):
 
 
56
  df.rename(columns={'Question Asked': 'texts'}, inplace=True)
57
  df['texts'] = df['texts'].astype(str).str.lower()
58
  df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
 
103
  df['texts'] = df['texts'].str.strip()
104
  df = df[df['texts'] != '']
105
 
106
+ df['Category'] = df['texts'].apply(lambda x: categorize_question(x, categories_keywords))
 
107
 
108
  return df
109
 
 
145
  return img
146
 
147
  def generate_bar_chart(df, num_clusters_to_display):
 
148
  common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
149
 
150
  top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
 
166
  img = Image.open(buf)
167
  return img
168
 
169
+ def main(file, num_clusters_to_display, categories_keywords):
170
  try:
171
  df = pd.read_csv(file)
172
 
 
173
  df = df[df['Answer'] == 'Fallback Message shown']
174
 
175
+ df = preprocess_data(df, categories_keywords)
176
 
 
177
  category_sizes = df['Category'].value_counts().reset_index()
178
  category_sizes.columns = ['Category', 'Count']
179
  sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
180
 
 
181
  largest_categories = sorted_categories[:num_clusters_to_display]
182
 
 
183
  filtered_df = df[df['Category'].isin(largest_categories)]
184
 
 
185
  filtered_df = filtered_df.sort_values(by='Category')
186
 
187
  wordcloud_img = generate_wordcloud(filtered_df)
 
196
  print(f"Error: {e}")
197
  return str(e), None, None
198
 
199
+ def select_bot(bot_name):
200
+ global categories_keywords
201
+ if bot_name == "Bot 1":
202
+ categories_keywords = categories_keywords_bot1
203
+ else:
204
+ categories_keywords = categories_keywords_bot2
205
+ return f"Selected {bot_name}"
206
+
207
+ def categorize_unanswered_queries(file, num_clusters_to_display):
208
+ return main(file, num_clusters_to_display, categories_keywords)
209
+
210
+ bot_selection_interface = gr.Interface(
211
+ fn=select_bot,
212
+ inputs=[
213
+ gr.Radio(["Bot 1", "Bot 2"], label="Select Bot")
214
+ ],
215
+ outputs=[
216
+ gr.Textbox(label="Selected Bot")
217
+ ],
218
+ title="Select Bot",
219
+ description="Select the bot for categorizing unanswered queries."
220
+ )
221
+
222
+ categorize_interface = gr.Interface(
223
+ fn=categorize_unanswered_queries,
224
  inputs=[
225
  gr.File(label="Upload CSV File (.csv)"),
226
  gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
 
234
  description="Categorize unanswered user queries into predefined categories"
235
  )
236
 
237
+ main_interface = gr.TabbedInterface([bot_selection_interface, categorize_interface], ["Select Bot", "Categorize Queries"])
238
 
239
+ main_interface.launch(share=True)