Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,8 @@ import matplotlib.pyplot as plt
|
|
| 10 |
import plotly.express as px
|
| 11 |
from PIL import Image
|
| 12 |
|
| 13 |
-
|
|
|
|
| 14 |
'Application Status': ['application', 'applied', 'update on my application', 'result of my application', 'selected', 'selection process', 'apply', 'fellow', 'lesson plan', 'status of my application', 'application update', 'application status', 'applied for'],
|
| 15 |
'Volunteering': ['volunteering', 'volunteer', 'volunteering certificate', 'resume my volunteering', 'volunteering journey', 'volunteering with TFI', 'volunteering opportunities', 'volunteer work', 'volunteer program'],
|
| 16 |
'Certificates': ['certificate', 'certificates', 'certificate of completion', 'volunteer certificate', 'issue certificate'],
|
|
@@ -29,22 +30,29 @@ categories_keywords = {
|
|
| 29 |
'Miscellaneous': []
|
| 30 |
}
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
| 34 |
for category, keywords in categories_keywords.items():
|
| 35 |
for keyword in keywords:
|
| 36 |
if keyword.lower() in question.lower():
|
| 37 |
-
# Check if the question is one word and belongs to 'End of Conversation'
|
| 38 |
if category == 'End of Conversation':
|
| 39 |
return category
|
| 40 |
-
# If not 'End of Conversation', return the matched category
|
| 41 |
if category != 'End of Conversation':
|
| 42 |
return category
|
| 43 |
return 'Miscellaneous'
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
def preprocess_data(df):
|
| 48 |
df.rename(columns={'Question Asked': 'texts'}, inplace=True)
|
| 49 |
df['texts'] = df['texts'].astype(str).str.lower()
|
| 50 |
df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
|
|
@@ -95,8 +103,7 @@ def preprocess_data(df):
|
|
| 95 |
df['texts'] = df['texts'].str.strip()
|
| 96 |
df = df[df['texts'] != '']
|
| 97 |
|
| 98 |
-
|
| 99 |
-
df['Category'] = df['texts'].apply(categorize_question)
|
| 100 |
|
| 101 |
return df
|
| 102 |
|
|
@@ -138,7 +145,6 @@ def generate_wordcloud(df):
|
|
| 138 |
return img
|
| 139 |
|
| 140 |
def generate_bar_chart(df, num_clusters_to_display):
|
| 141 |
-
# Exclude common words from the top words
|
| 142 |
common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
|
| 143 |
|
| 144 |
top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
|
|
@@ -160,27 +166,22 @@ def generate_bar_chart(df, num_clusters_to_display):
|
|
| 160 |
img = Image.open(buf)
|
| 161 |
return img
|
| 162 |
|
| 163 |
-
def main(file, num_clusters_to_display):
|
| 164 |
try:
|
| 165 |
df = pd.read_csv(file)
|
| 166 |
|
| 167 |
-
# Filter by 'Fallback Message shown'
|
| 168 |
df = df[df['Answer'] == 'Fallback Message shown']
|
| 169 |
|
| 170 |
-
df = preprocess_data(df)
|
| 171 |
|
| 172 |
-
# Get category sizes and sort by size in ascending order
|
| 173 |
category_sizes = df['Category'].value_counts().reset_index()
|
| 174 |
category_sizes.columns = ['Category', 'Count']
|
| 175 |
sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
|
| 176 |
|
| 177 |
-
# Get the largest x categories as specified by num_clusters_to_display
|
| 178 |
largest_categories = sorted_categories[:num_clusters_to_display]
|
| 179 |
|
| 180 |
-
# Filter the dataframe to include only the largest categories
|
| 181 |
filtered_df = df[df['Category'].isin(largest_categories)]
|
| 182 |
|
| 183 |
-
# Sort the dataframe by Category
|
| 184 |
filtered_df = filtered_df.sort_values(by='Category')
|
| 185 |
|
| 186 |
wordcloud_img = generate_wordcloud(filtered_df)
|
|
@@ -195,8 +196,31 @@ def main(file, num_clusters_to_display):
|
|
| 195 |
print(f"Error: {e}")
|
| 196 |
return str(e), None, None
|
| 197 |
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
inputs=[
|
| 201 |
gr.File(label="Upload CSV File (.csv)"),
|
| 202 |
gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
|
|
@@ -210,5 +234,6 @@ interface = gr.Interface(
|
|
| 210 |
description="Categorize unanswered user queries into predefined categories"
|
| 211 |
)
|
| 212 |
|
| 213 |
-
|
| 214 |
|
|
|
|
|
|
| 10 |
import plotly.express as px
|
| 11 |
from PIL import Image
|
| 12 |
|
| 13 |
+
# Define categories for Bot 1 and Bot 2
|
| 14 |
+
categories_keywords_bot1 = {
|
| 15 |
'Application Status': ['application', 'applied', 'update on my application', 'result of my application', 'selected', 'selection process', 'apply', 'fellow', 'lesson plan', 'status of my application', 'application update', 'application status', 'applied for'],
|
| 16 |
'Volunteering': ['volunteering', 'volunteer', 'volunteering certificate', 'resume my volunteering', 'volunteering journey', 'volunteering with TFI', 'volunteering opportunities', 'volunteer work', 'volunteer program'],
|
| 17 |
'Certificates': ['certificate', 'certificates', 'certificate of completion', 'volunteer certificate', 'issue certificate'],
|
|
|
|
| 30 |
'Miscellaneous': []
|
| 31 |
}
|
| 32 |
|
| 33 |
+
categories_keywords_bot2 = {
|
| 34 |
+
'Service Issues': ['service', 'support', 'help', 'assistance'],
|
| 35 |
+
'Billing': ['bill', 'billing', 'invoice', 'payment', 'charge'],
|
| 36 |
+
'Technical Problems': ['technical', 'error', 'problem', 'issue'],
|
| 37 |
+
'Account Management': ['account', 'login', 'credentials', 'password'],
|
| 38 |
+
'Product Information': ['product', 'information', 'details', 'specifications'],
|
| 39 |
+
'Miscellaneous': []
|
| 40 |
+
}
|
| 41 |
|
| 42 |
+
# Initialize with Bot 1's categories
|
| 43 |
+
categories_keywords = categories_keywords_bot1
|
| 44 |
+
|
| 45 |
+
def categorize_question(question, categories_keywords):
|
| 46 |
for category, keywords in categories_keywords.items():
|
| 47 |
for keyword in keywords:
|
| 48 |
if keyword.lower() in question.lower():
|
|
|
|
| 49 |
if category == 'End of Conversation':
|
| 50 |
return category
|
|
|
|
| 51 |
if category != 'End of Conversation':
|
| 52 |
return category
|
| 53 |
return 'Miscellaneous'
|
| 54 |
|
| 55 |
+
def preprocess_data(df, categories_keywords):
|
|
|
|
|
|
|
| 56 |
df.rename(columns={'Question Asked': 'texts'}, inplace=True)
|
| 57 |
df['texts'] = df['texts'].astype(str).str.lower()
|
| 58 |
df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
|
|
|
|
| 103 |
df['texts'] = df['texts'].str.strip()
|
| 104 |
df = df[df['texts'] != '']
|
| 105 |
|
| 106 |
+
df['Category'] = df['texts'].apply(lambda x: categorize_question(x, categories_keywords))
|
|
|
|
| 107 |
|
| 108 |
return df
|
| 109 |
|
|
|
|
| 145 |
return img
|
| 146 |
|
| 147 |
def generate_bar_chart(df, num_clusters_to_display):
|
|
|
|
| 148 |
common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
|
| 149 |
|
| 150 |
top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
|
|
|
|
| 166 |
img = Image.open(buf)
|
| 167 |
return img
|
| 168 |
|
| 169 |
+
def main(file, num_clusters_to_display, categories_keywords):
|
| 170 |
try:
|
| 171 |
df = pd.read_csv(file)
|
| 172 |
|
|
|
|
| 173 |
df = df[df['Answer'] == 'Fallback Message shown']
|
| 174 |
|
| 175 |
+
df = preprocess_data(df, categories_keywords)
|
| 176 |
|
|
|
|
| 177 |
category_sizes = df['Category'].value_counts().reset_index()
|
| 178 |
category_sizes.columns = ['Category', 'Count']
|
| 179 |
sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
|
| 180 |
|
|
|
|
| 181 |
largest_categories = sorted_categories[:num_clusters_to_display]
|
| 182 |
|
|
|
|
| 183 |
filtered_df = df[df['Category'].isin(largest_categories)]
|
| 184 |
|
|
|
|
| 185 |
filtered_df = filtered_df.sort_values(by='Category')
|
| 186 |
|
| 187 |
wordcloud_img = generate_wordcloud(filtered_df)
|
|
|
|
| 196 |
print(f"Error: {e}")
|
| 197 |
return str(e), None, None
|
| 198 |
|
| 199 |
+
def select_bot(bot_name):
|
| 200 |
+
global categories_keywords
|
| 201 |
+
if bot_name == "Bot 1":
|
| 202 |
+
categories_keywords = categories_keywords_bot1
|
| 203 |
+
else:
|
| 204 |
+
categories_keywords = categories_keywords_bot2
|
| 205 |
+
return f"Selected {bot_name}"
|
| 206 |
+
|
| 207 |
+
def categorize_unanswered_queries(file, num_clusters_to_display):
|
| 208 |
+
return main(file, num_clusters_to_display, categories_keywords)
|
| 209 |
+
|
| 210 |
+
bot_selection_interface = gr.Interface(
|
| 211 |
+
fn=select_bot,
|
| 212 |
+
inputs=[
|
| 213 |
+
gr.Radio(["Bot 1", "Bot 2"], label="Select Bot")
|
| 214 |
+
],
|
| 215 |
+
outputs=[
|
| 216 |
+
gr.Textbox(label="Selected Bot")
|
| 217 |
+
],
|
| 218 |
+
title="Select Bot",
|
| 219 |
+
description="Select the bot for categorizing unanswered queries."
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
categorize_interface = gr.Interface(
|
| 223 |
+
fn=categorize_unanswered_queries,
|
| 224 |
inputs=[
|
| 225 |
gr.File(label="Upload CSV File (.csv)"),
|
| 226 |
gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
|
|
|
|
| 234 |
description="Categorize unanswered user queries into predefined categories"
|
| 235 |
)
|
| 236 |
|
| 237 |
+
main_interface = gr.TabbedInterface([bot_selection_interface, categorize_interface], ["Select Bot", "Categorize Queries"])
|
| 238 |
|
| 239 |
+
main_interface.launch(share=True)
|