tanish78 commited on
Commit
b268803
·
verified ·
1 Parent(s): d731f11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -27
app.py CHANGED
@@ -39,26 +39,19 @@ categories_keywords = {
39
  }
40
 
41
  def categorize_question(question):
42
- # Split the question into words
43
  words = question.split()
44
-
45
- # Check if the question has only one word
46
  if len(words) == 1:
47
  single_word = words[0].lower()
48
- # Check if the single word is in the Start of Conversation category
49
  if any(single_word in keyword for keyword in categories_keywords["Start of Conversation"]):
50
  return "Start of Conversation"
51
  else:
52
  return "End of Conversation"
53
 
54
- # Categorization of other queries
55
  for category, keywords in categories_keywords.items():
56
  if any(keyword.lower() in question.lower() for keyword in keywords):
57
  return category
58
  return "Miscellaneous"
59
 
60
-
61
-
62
  def preprocess_data(df):
63
  df.rename(columns={'Question Asked': 'texts'}, inplace=True)
64
  df['texts'] = df['texts'].astype(str).str.lower()
@@ -110,7 +103,6 @@ def preprocess_data(df):
110
  df['texts'] = df['texts'].str.strip()
111
  df = df[df['texts'] != '']
112
 
113
- # Categorize the texts
114
  df['Category'] = df['texts'].apply(categorize_question)
115
 
116
  return df
@@ -153,7 +145,6 @@ def generate_wordcloud(df):
153
  return img
154
 
155
  def generate_bar_chart(df, num_clusters_to_display):
156
- # Exclude common words from the top words
157
  common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
158
 
159
  top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
@@ -179,30 +170,15 @@ def main(file, num_clusters_to_display):
179
  try:
180
  df = pd.read_csv(file)
181
 
182
- # Filter by 'Fallback Message shown'
183
  df = df[df['Answer'] == 'Fallback Message shown']
184
 
185
  df = preprocess_data(df)
186
 
187
- # Get category sizes and sort by size in ascending order
188
- category_sizes = df['Category'].value_counts().reset_index()
189
- category_sizes.columns = ['Category', 'Count']
190
- sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
191
-
192
- # Get the largest x categories as specified by num_clusters_to_display
193
- largest_categories = sorted_categories[:num_clusters_to_display]
194
-
195
- # Filter the dataframe to include only the largest categories
196
- filtered_df = df[df['Category'].isin(largest_categories)]
197
-
198
- # Sort the dataframe by Category
199
- filtered_df = filtered_df.sort_values(by='Category')
200
-
201
- wordcloud_img = generate_wordcloud(filtered_df)
202
- bar_chart_img = generate_bar_chart(filtered_df, num_clusters_to_display)
203
 
204
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
205
- filtered_df.to_csv(tmpfile.name, index=False)
206
  csv_file_path = tmpfile.name
207
 
208
  return csv_file_path, wordcloud_img, bar_chart_img
 
39
  }
40
 
41
  def categorize_question(question):
 
42
  words = question.split()
 
 
43
  if len(words) == 1:
44
  single_word = words[0].lower()
 
45
  if any(single_word in keyword for keyword in categories_keywords["Start of Conversation"]):
46
  return "Start of Conversation"
47
  else:
48
  return "End of Conversation"
49
 
 
50
  for category, keywords in categories_keywords.items():
51
  if any(keyword.lower() in question.lower() for keyword in keywords):
52
  return category
53
  return "Miscellaneous"
54
 
 
 
55
  def preprocess_data(df):
56
  df.rename(columns={'Question Asked': 'texts'}, inplace=True)
57
  df['texts'] = df['texts'].astype(str).str.lower()
 
103
  df['texts'] = df['texts'].str.strip()
104
  df = df[df['texts'] != '']
105
 
 
106
  df['Category'] = df['texts'].apply(categorize_question)
107
 
108
  return df
 
145
  return img
146
 
147
  def generate_bar_chart(df, num_clusters_to_display):
 
148
  common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
149
 
150
  top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
 
170
  try:
171
  df = pd.read_csv(file)
172
 
 
173
  df = df[df['Answer'] == 'Fallback Message shown']
174
 
175
  df = preprocess_data(df)
176
 
177
+ wordcloud_img = generate_wordcloud(df)
178
+ bar_chart_img = generate_bar_chart(df, num_clusters_to_display)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
181
+ df.to_csv(tmpfile.name, index=False)
182
  csv_file_path = tmpfile.name
183
 
184
  return csv_file_path, wordcloud_img, bar_chart_img