tanish78 commited on
Commit
1927724
·
verified ·
1 Parent(s): d563311

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -48
app.py CHANGED
@@ -10,29 +10,8 @@ import matplotlib.pyplot as plt
10
  import plotly.express as px
11
  from PIL import Image
12
 
13
- # Define categories for Teach For India Bot and Firki Bot
14
- categories_keywords_tfi = {
15
- 'Start of Conversation': ['hi', 'hello', 'hi I have a query', 'query', 'good morning', 'good afternoon', 'good evening'],
16
- 'Application Status': ['application', 'applied', 'update on my application', 'result of my application', 'selected', 'selection process', 'apply', 'fellow', 'lesson plan', 'status of my application', 'application update', 'application status', 'applied for'],
17
- 'Volunteering': ['volunteering', 'volunteer', 'volunteering certificate', 'resume my volunteering', 'volunteering journey', 'volunteering with TFI', 'volunteering opportunities', 'volunteer work', 'volunteer program'],
18
- 'Certificates': ['certificate', 'certificates', 'certificate of completion', 'volunteer certificate', 'issue certificate'],
19
- 'Job Opportunities': ['job', 'vacancy', 'Talent Acquisition Executive job', 'opportunity', 'job opening', 'job position', 'career opportunities'],
20
- 'Surveys and Forms': ['survey', 'form', 'fill out the survey', 'application form', 'survey link', 'survey form', 'form submission'],
21
- 'General Queries': ['query', 'queries', 'questions', 'feedback', 'loved', 'overwhelming', 'general question', 'inquiry', 'query about'],
22
- 'Spam': ['free recharge', 'offer', 'click the link', 'https'],
23
- 'Rescheduling and Postponing': ['reschedule', 'postpone', 'cancellation', 'date', 'time slot', 'change date', 'change time', 'reschedule appointment'],
24
- 'Contact and Communication Issues': ['call', 'phone', 'contact', 'not received', 'contact support', 'phone call', 'call back', 'internet'],
25
- 'Email and Credentials Issues': ['email', 'credentials', 'received', 'email issue', 'email problem', 'credential issue', 'login problem'],
26
- 'Timing and Scheduling': ['session', 'time', 'interview', 'baje', 'schedule time', 'meeting time', 'appointment time'],
27
- 'Salary and Benefits': ['salary', 'increment', 'accommodation', 'training period', 'reside', 'stipend', 'pay', 'wage', 'salary details', 'benefits information'],
28
- 'Technical Issues': ['network issues', 'zoom meeting', 'passcode', 'technical', 'issue','technical problem', 'system issue', 'technical support'],
29
- 'Complaint Handling': ['help', 'i need help', 'Help me', 'complaint', 'issue is unresolved', 'unsatisfied', 'bad experience'],
30
- 'User Feedback': ['feedback', 'loved', 'dissapointed', 'hated', 'it was good', 'it was bad', 'helpful',],
31
- 'End of Conversation': ['thanks', 'thankss', 'thank u', 'thank you', 'ok', 'okay', 'done', 'joining', 'sounds good', 'goodbye', 'end chat', 'end'],
32
- 'Miscellaneous': []
33
- }
34
-
35
- categories_keywords_firki = {
36
  "Application Status": ["application status", "application", "status", "submitted", "processing", "pending", "approval", "rejected", "accepted"],
37
  "Volunteering": ["volunteer", "volunteering", "help out", "assist", "volunteer work", "volunteer opportunities"],
38
  "Certificates": ["certificate", "certificates", "completion", "certification", "accreditation", "proof", "document", "certified"],
@@ -55,23 +34,32 @@ categories_keywords_firki = {
55
  "Service Requests": ["service", "support", "request", "assistance", "help", "aid", "maintenance"],
56
  "Account Issues": ["account", "profile", "update", "activation", "deactivation", "credentials", "reset"],
57
  "Product Information": ["product", "service", "details", "info", "information", "specifications", "features"],
 
58
  "Order Status": ["order", "status", "tracking", "shipment", "delivery", "purchase", "dispatch"],
59
  "Miscellaneous": ["miscellaneous", "other", "various", "random", "general", "unknown", "unsorted"]
60
 
61
  }
62
 
63
  # Initialize
64
- categories_keywords = categories_keywords_tfi
65
 
66
- def categorize_question(question, categories_keywords):
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  for category, keywords in categories_keywords.items():
68
- for keyword in keywords:
69
- if keyword.lower() in question.lower():
70
- if category == 'End of Conversation':
71
- return category
72
- if category != 'End of Conversation':
73
- return category
74
- return 'Miscellaneous'
75
 
76
  def preprocess_data(df, categories_keywords):
77
  df.rename(columns={'Question Asked': 'texts'}, inplace=True)
@@ -187,28 +175,27 @@ def generate_bar_chart(df, num_clusters_to_display):
187
  img = Image.open(buf)
188
  return img
189
 
190
- def main(file, bot_name, num_clusters_to_display):
191
  try:
192
- global categories_keywords
193
- if bot_name == "Teach For India":
194
- categories_keywords = categories_keywords_tfi
195
- else:
196
- categories_keywords = categories_keywords_firki
197
-
198
- df = pd.read_csv(file.name)
199
 
 
200
  df = df[df['Answer'] == 'Fallback Message shown']
201
 
202
- df = preprocess_data(df, categories_keywords)
203
 
 
204
  category_sizes = df['Category'].value_counts().reset_index()
205
  category_sizes.columns = ['Category', 'Count']
206
  sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
207
 
 
208
  largest_categories = sorted_categories[:num_clusters_to_display]
209
 
 
210
  filtered_df = df[df['Category'].isin(largest_categories)]
211
 
 
212
  filtered_df = filtered_df.sort_values(by='Category')
213
 
214
  wordcloud_img = generate_wordcloud(filtered_df)
@@ -223,13 +210,9 @@ def main(file, bot_name, num_clusters_to_display):
223
  print(f"Error: {e}")
224
  return str(e), None, None
225
 
226
- def categorize_unanswered_queries(bot_name, file, num_clusters_to_display):
227
- return main(file, bot_name, num_clusters_to_display)
228
-
229
  interface = gr.Interface(
230
- fn=categorize_unanswered_queries,
231
  inputs=[
232
- gr.Radio(["Teach For India", "Firki"], label="Select ChatBot"),
233
  gr.File(label="Upload CSV File (.csv)"),
234
  gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
235
  ],
@@ -239,7 +222,7 @@ interface = gr.Interface(
239
  gr.Image(label="Bar Chart")
240
  ],
241
  title="Unanswered User Queries Categorization",
242
- description="Select the bot, upload the CSV file, and specify the number of categories to display to categorize unanswered user queries."
243
  )
244
 
245
- interface.launch()
 
10
  import plotly.express as px
11
  from PIL import Image
12
 
13
+ # Define categories
14
+ categories_keywords = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "Application Status": ["application status", "application", "status", "submitted", "processing", "pending", "approval", "rejected", "accepted"],
16
  "Volunteering": ["volunteer", "volunteering", "help out", "assist", "volunteer work", "volunteer opportunities"],
17
  "Certificates": ["certificate", "certificates", "completion", "certification", "accreditation", "proof", "document", "certified"],
 
34
  "Service Requests": ["service", "support", "request", "assistance", "help", "aid", "maintenance"],
35
  "Account Issues": ["account", "profile", "update", "activation", "deactivation", "credentials", "reset"],
36
  "Product Information": ["product", "service", "details", "info", "information", "specifications", "features"],
37
+ "Account Management": ["login", "account", "new account", "create account", "log out", "google", "access"],
38
  "Order Status": ["order", "status", "tracking", "shipment", "delivery", "purchase", "dispatch"],
39
  "Miscellaneous": ["miscellaneous", "other", "various", "random", "general", "unknown", "unsorted"]
40
 
41
  }
42
 
43
  # Initialize
 
44
 
45
+ def categorize_question(question):
46
+ # Split the question into words
47
+ words = question.split()
48
+
49
+ # Check if the question has only one word
50
+ if len(words) == 1:
51
+ single_word = words[0].lower()
52
+ # Check if the single word is in the Start of Conversation category
53
+ if any(single_word in keyword for keyword in categories_keywords["Start of Conversation"]):
54
+ return "Start of Conversation"
55
+ else:
56
+ return "End of Conversation"
57
+
58
+ # General categorization based on multiple words
59
  for category, keywords in categories_keywords.items():
60
+ if any(keyword.lower() in question.lower() for keyword in keywords):
61
+ return category
62
+ return "Miscellaneous"
 
 
 
 
63
 
64
  def preprocess_data(df, categories_keywords):
65
  df.rename(columns={'Question Asked': 'texts'}, inplace=True)
 
175
  img = Image.open(buf)
176
  return img
177
 
178
+ def main(file, num_clusters_to_display):
179
  try:
180
+ df = pd.read_csv(file)
 
 
 
 
 
 
181
 
182
+ # Filter by 'Fallback Message shown'
183
  df = df[df['Answer'] == 'Fallback Message shown']
184
 
185
+ df = preprocess_data(df)
186
 
187
+ # Get category sizes and sort by size in ascending order
188
  category_sizes = df['Category'].value_counts().reset_index()
189
  category_sizes.columns = ['Category', 'Count']
190
  sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
191
 
192
+ # Get the largest x categories as specified by num_clusters_to_display
193
  largest_categories = sorted_categories[:num_clusters_to_display]
194
 
195
+ # Filter the dataframe to include only the largest categories
196
  filtered_df = df[df['Category'].isin(largest_categories)]
197
 
198
+ # Sort the dataframe by Category
199
  filtered_df = filtered_df.sort_values(by='Category')
200
 
201
  wordcloud_img = generate_wordcloud(filtered_df)
 
210
  print(f"Error: {e}")
211
  return str(e), None, None
212
 
 
 
 
213
  interface = gr.Interface(
214
+ fn=main,
215
  inputs=[
 
216
  gr.File(label="Upload CSV File (.csv)"),
217
  gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
218
  ],
 
222
  gr.Image(label="Bar Chart")
223
  ],
224
  title="Unanswered User Queries Categorization",
225
+ description="Categorize unanswered user queries into predefined categories"
226
  )
227
 
228
+ interface.launch(share=True)