Spaces:

TFI
/

K-Means_Clustering_Algorithm

Sleeping

App Files Files Community

tanish78 commited on Jul 25, 2024

Commit

bd2d8e4

verified ·

1 Parent(s): c1344f9

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -20

app.py CHANGED Viewed

@@ -10,7 +10,8 @@ import matplotlib.pyplot as plt
 import plotly.express as px
 from PIL import Image
-categories_keywords = {
     'Application Status': ['application', 'applied', 'update on my application', 'result of my application', 'selected', 'selection process', 'apply', 'fellow', 'lesson plan', 'status of my application', 'application update', 'application status', 'applied for'],
     'Volunteering': ['volunteering', 'volunteer', 'volunteering certificate', 'resume my volunteering', 'volunteering journey', 'volunteering with TFI', 'volunteering opportunities', 'volunteer work', 'volunteer program'],
     'Certificates': ['certificate', 'certificates', 'certificate of completion', 'volunteer certificate', 'issue certificate'],
@@ -29,22 +30,29 @@ categories_keywords = {
     'Miscellaneous': []
 }
-def categorize_question(question):
     for category, keywords in categories_keywords.items():
         for keyword in keywords:
             if keyword.lower() in question.lower():
-                # Check if the question is one word and belongs to 'End of Conversation'
                 if category == 'End of Conversation':
                     return category
-                # If not 'End of Conversation', return the matched category
                 if category != 'End of Conversation':
                     return category
     return 'Miscellaneous'
-def preprocess_data(df):
     df.rename(columns={'Question Asked': 'texts'}, inplace=True)
     df['texts'] = df['texts'].astype(str).str.lower()
     df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
@@ -95,8 +103,7 @@ def preprocess_data(df):
     df['texts'] = df['texts'].str.strip()
     df = df[df['texts'] != '']
-    # Categorize the texts
-    df['Category'] = df['texts'].apply(categorize_question)
     return df
@@ -138,7 +145,6 @@ def generate_wordcloud(df):
     return img
 def generate_bar_chart(df, num_clusters_to_display):
-    # Exclude common words from the top words
     common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
     top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
@@ -160,27 +166,22 @@ def generate_bar_chart(df, num_clusters_to_display):
     img = Image.open(buf)
     return img
-def main(file, num_clusters_to_display):
     try:
         df = pd.read_csv(file)
-        # Filter by 'Fallback Message shown'
         df = df[df['Answer'] == 'Fallback Message shown']
-        df = preprocess_data(df)
-        # Get category sizes and sort by size in ascending order
         category_sizes = df['Category'].value_counts().reset_index()
         category_sizes.columns = ['Category', 'Count']
         sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
-        # Get the largest x categories as specified by num_clusters_to_display
         largest_categories = sorted_categories[:num_clusters_to_display]
-        # Filter the dataframe to include only the largest categories
         filtered_df = df[df['Category'].isin(largest_categories)]
-        # Sort the dataframe by Category
         filtered_df = filtered_df.sort_values(by='Category')
         wordcloud_img = generate_wordcloud(filtered_df)
@@ -195,8 +196,31 @@ def main(file, num_clusters_to_display):
         print(f"Error: {e}")
         return str(e), None, None
-interface = gr.Interface(
-    fn=main,
     inputs=[
         gr.File(label="Upload CSV File (.csv)"),
         gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
@@ -210,5 +234,6 @@ interface = gr.Interface(
     description="Categorize unanswered user queries into predefined categories"
 )
-interface.launch(share=True)

 import plotly.express as px
 from PIL import Image
+# Define categories for Bot 1 and Bot 2
+categories_keywords_bot1 = {
     'Application Status': ['application', 'applied', 'update on my application', 'result of my application', 'selected', 'selection process', 'apply', 'fellow', 'lesson plan', 'status of my application', 'application update', 'application status', 'applied for'],
     'Volunteering': ['volunteering', 'volunteer', 'volunteering certificate', 'resume my volunteering', 'volunteering journey', 'volunteering with TFI', 'volunteering opportunities', 'volunteer work', 'volunteer program'],
     'Certificates': ['certificate', 'certificates', 'certificate of completion', 'volunteer certificate', 'issue certificate'],
     'Miscellaneous': []
 }
+categories_keywords_bot2 = {
+    'Service Issues': ['service', 'support', 'help', 'assistance'],
+    'Billing': ['bill', 'billing', 'invoice', 'payment', 'charge'],
+    'Technical Problems': ['technical', 'error', 'problem', 'issue'],
+    'Account Management': ['account', 'login', 'credentials', 'password'],
+    'Product Information': ['product', 'information', 'details', 'specifications'],
+    'Miscellaneous': []
+}
+# Initialize with Bot 1's categories
+categories_keywords = categories_keywords_bot1
+def categorize_question(question, categories_keywords):
     for category, keywords in categories_keywords.items():
         for keyword in keywords:
             if keyword.lower() in question.lower():
                 if category == 'End of Conversation':
                     return category
                 if category != 'End of Conversation':
                     return category
     return 'Miscellaneous'
+def preprocess_data(df, categories_keywords):
     df.rename(columns={'Question Asked': 'texts'}, inplace=True)
     df['texts'] = df['texts'].astype(str).str.lower()
     df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
     df['texts'] = df['texts'].str.strip()
     df = df[df['texts'] != '']
+    df['Category'] = df['texts'].apply(lambda x: categorize_question(x, categories_keywords))
     return df
     return img
 def generate_bar_chart(df, num_clusters_to_display):
     common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
     top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
     img = Image.open(buf)
     return img
+def main(file, num_clusters_to_display, categories_keywords):
     try:
         df = pd.read_csv(file)
         df = df[df['Answer'] == 'Fallback Message shown']
+        df = preprocess_data(df, categories_keywords)
         category_sizes = df['Category'].value_counts().reset_index()
         category_sizes.columns = ['Category', 'Count']
         sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
         largest_categories = sorted_categories[:num_clusters_to_display]
         filtered_df = df[df['Category'].isin(largest_categories)]
         filtered_df = filtered_df.sort_values(by='Category')
         wordcloud_img = generate_wordcloud(filtered_df)
         print(f"Error: {e}")
         return str(e), None, None
+def select_bot(bot_name):
+    global categories_keywords
+    if bot_name == "Bot 1":
+        categories_keywords = categories_keywords_bot1
+    else:
+        categories_keywords = categories_keywords_bot2
+    return f"Selected {bot_name}"
+def categorize_unanswered_queries(file, num_clusters_to_display):
+    return main(file, num_clusters_to_display, categories_keywords)
+bot_selection_interface = gr.Interface(
+    fn=select_bot,
+    inputs=[
+        gr.Radio(["Bot 1", "Bot 2"], label="Select Bot")
+    ],
+    outputs=[
+        gr.Textbox(label="Selected Bot")
+    ],
+    title="Select Bot",
+    description="Select the bot for categorizing unanswered queries."
+)
+categorize_interface = gr.Interface(
+    fn=categorize_unanswered_queries,
     inputs=[
         gr.File(label="Upload CSV File (.csv)"),
         gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
     description="Categorize unanswered user queries into predefined categories"
 )
+main_interface = gr.TabbedInterface([bot_selection_interface, categorize_interface], ["Select Bot", "Categorize Queries"])
+main_interface.launch(share=True)