igortech commited on
Commit
db4315a
·
verified ·
1 Parent(s): 6ce6baf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -344
app.py CHANGED
@@ -1,416 +1,215 @@
1
  import json
2
- import os
3
- import datetime
4
- import csv
5
  import random
6
  import re
7
  import string
8
- import urllib.parse
 
 
9
  import gradio as gr
10
- from difflib import SequenceMatcher
11
 
12
  # -----------------------------
13
- # Config
14
  # -----------------------------
15
  DATA_PATH = "quotes.json"
16
- MIN_SCORE = 1 # minimum score to consider a match (used after scoring)
17
- TOP_N = 5 # how many top quotes to consider for fusion
18
 
19
- # -----------------------------
20
- # Utility: load & normalize data
21
- # -----------------------------
22
  def load_quotes():
23
- """
24
- Loads quotes.json and accepts both:
25
- - new schema: { "food": ["quote1", "quote2", ...], ... }
26
- - old schema: { "food": { "positive":[..], "negative":[..] } , ... }
27
- Converts old schema into list form by concatenation.
28
- """
29
- if not os.path.exists(DATA_PATH):
30
- print(f"{DATA_PATH} not found.")
31
- return {}
32
-
33
- try:
34
- with open(DATA_PATH, "r", encoding="utf-8") as f:
35
- data = json.load(f)
36
- if not isinstance(data, dict):
37
- print(f"{DATA_PATH} root must be an object/dict.")
38
- return {}
39
-
40
- normalized = {}
41
- for cat, val in data.items():
42
- if isinstance(val, list):
43
- normalized[cat] = [str(q).strip() for q in val if isinstance(q, str) and q.strip()]
44
- elif isinstance(val, dict):
45
- # old schema; flatten positive/negative lists (preserve order)
46
- merged = []
47
- for k in ("positive", "negative"):
48
- items = val.get(k, [])
49
- if isinstance(items, list):
50
- merged.extend([str(q).strip() for q in items if isinstance(q, str) and q.strip()])
51
- normalized[cat] = merged
52
- else:
53
- # unsupported format
54
- normalized[cat] = []
55
- print(f"Loaded {len(normalized)} categories from {DATA_PATH}.")
56
- return normalized
57
- except Exception as e:
58
- print(f"Error loading {DATA_PATH}: {e}")
59
- return {}
60
 
61
- QUOTES = load_quotes()
62
 
63
  # -----------------------------
64
- # Text helpers & scoring
65
  # -----------------------------
66
- punct_re = re.compile(f"[{re.escape(string.punctuation)}]")
67
-
68
  STOPWORDS = {
69
  "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
70
  "is","are","was","were","be","being","been","it","that","this","these","those","with",
71
  "as","by","from","about","into","over","after","before","up","down","out"
72
  }
73
 
74
- POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent","delicious","fresh","helpful"}
75
- NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult","crowded","bland","stale"}
 
 
76
 
77
  def normalize(text: str) -> str:
78
- return punct_re.sub(" ", (text or "").lower()).strip()
79
 
80
  def tokenize(text: str):
81
  return [t for t in normalize(text).split() if t and t not in STOPWORDS]
82
 
83
- def seq_similarity(a: str, b: str) -> float:
84
- return SequenceMatcher(None, a, b).ratio()
85
-
86
- def score_quote_against_query(quote: str, query: str) -> int:
87
- """Weighted integer score: token overlap (weighted) + sequence-sim tie-breaker."""
88
- q_tokens = set(tokenize(quote))
89
- u_tokens = set(tokenize(query))
90
- overlap = len(q_tokens & u_tokens)
91
- seq_sim = seq_similarity(normalize(quote), normalize(query))
92
- score = overlap * 3 + int(seq_sim * 3)
93
- return score
94
-
95
- def find_top_matches(category: str, user_text: str, top_n: int = TOP_N):
96
- """Return list of (quote, score) sorted desc for given category."""
97
- quotes = QUOTES.get(category, [])
98
- scored = []
99
- for q in quotes:
100
- sc = score_quote_against_query(q, user_text)
101
- scored.append((q, sc))
102
- scored.sort(key=lambda x: x[1], reverse=True)
103
- # filter low scores
104
- filtered = [t for t in scored if t[1] >= MIN_SCORE]
105
- return filtered[:top_n]
106
 
107
  # -----------------------------
108
- # Fusion helpers
109
- # -----------------------------
110
- def pick_pivot_token(quotes, user_tokens):
111
- freq = {}
112
- for q, _sc in quotes:
113
- toks = tokenize(q)
114
- for t in toks:
115
- if t in STOPWORDS: continue
116
- freq[t] = freq.get(t, 0) + 1
117
- for t in user_tokens:
118
- if t in freq:
119
- return t
120
- if not freq: return None
121
- return max(freq.items(), key=lambda x: x[1])[0]
122
-
123
- def extract_traits_for_pivot(quote, pivot):
124
- toks = tokenize(quote)
125
- trait_tokens = [t for t in toks if t != pivot and t not in STOPWORDS]
126
- if not trait_tokens: return None
127
- return " ".join(trait_tokens[:5])
128
-
129
- def fuse_quotes(top_matches, user_text):
130
- """
131
- Build a compact fused sentence from top_matches (list of (quote, score)).
132
- Strategy:
133
- - Determine pivot token from matches and user_text.
134
- - Extract short traits from each match and combine into: "The <pivot> is X, Y and Z."
135
- - Fallback: return first match (trimmed) or small snippets.
136
- """
137
- if not top_matches:
138
- return "No relevant quotes found."
139
-
140
- # If only one quote, return it directly (trim to reasonable length)
141
- if len(top_matches) == 1:
142
- q = top_matches[0][0]
143
- return q if len(q) <= 300 else q[:300] + "..."
144
-
145
- user_toks = set(tokenize(user_text))
146
- pivot = pick_pivot_token(top_matches, user_toks)
147
- traits = []
148
- used = set()
149
- for q, _sc in top_matches:
150
- tr = extract_traits_for_pivot(q, pivot) if pivot else None
151
- if tr and tr not in used:
152
- traits.append(tr)
153
- used.add(tr)
154
- if pivot and traits:
155
- if len(traits) == 1:
156
- trait_text = traits[0]
157
- else:
158
- trait_text = ", ".join(traits[:-1]) + " and " + traits[-1]
159
- return f"The {pivot} is {trait_text}."
160
- else:
161
- # fallback: give short snippets from up to 3 quotes
162
- snippets = []
163
- for q, _sc in top_matches[:3]:
164
- tokens = tokenize(q)
165
- short = " ".join(tokens[:18])
166
- if short not in snippets:
167
- snippets.append(short + ("..." if len(tokens) > 18 else ""))
168
- return " / ".join(snippets)
169
-
170
  # -----------------------------
171
- # Assemble 3-tier response
172
- # -----------------------------
173
- def assemble_response(category: str, user_text: str, domain_restrict: str = ""):
174
  if category not in QUOTES:
175
- return ("Summary: No category found.", "What real people say:\nNo data for this category.", "For more info: (no results)")
176
-
177
- matches = find_top_matches(category, user_text, top_n=TOP_N)
178
- if not matches:
179
- summary = f"Summary: I couldn't find close matches in '{category}' for that question."
180
- details = "What real people say:\nNo matching interview snippets available."
181
- search_q = f"{category} college reviews"
182
- link = "https://www.google.com/search?q=" + urllib.parse.quote_plus(search_q)
183
- return (summary, details, f"For more info: {link}")
184
-
185
- # Evaluate sentiment lean among matches
186
- pos_count = 0
187
- neg_count = 0
188
- for q, _sc in matches:
189
- qn = normalize(q)
190
- if any(p in qn for p in POS_HINTS): pos_count += 1
191
- if any(n in qn for n in NEG_HINTS): neg_count += 1
192
-
193
- total = max(pos_count + neg_count, 1)
194
- if pos_count > neg_count:
195
- summary = f"Summary: Matching interview snippets tend to express positive views about {category}."
196
- elif neg_count > pos_count:
197
- summary = f"Summary: Matching snippets express concerns or negative points about {category}."
198
- else:
199
- summary = f"Summary: Matching snippets include both positive and negative remarks about {category}."
200
-
201
- # Tier 2: fusion
202
- fusion_text = fuse_quotes(matches, user_text)
203
- details = "What real people say:\n" + fusion_text
204
-
205
- # Tier 3: domain-aware search link
206
- if domain_restrict and domain_restrict.strip():
207
- # user supplied domain -> site search
208
- q = f"site:{domain_restrict.strip()} {category}"
209
- link = "https://www.google.com/search?q=" + urllib.parse.quote_plus(q)
210
- else:
211
- q = f"{category} college reviews"
212
- link = "https://www.google.com/search?q=" + urllib.parse.quote_plus(q)
213
- footer = f"For more info: {link}"
214
-
215
- return (summary, details, footer)
216
-
217
- # -----------------------------
218
- # Conversation logging & export
219
- # -----------------------------
220
- conversation_log = [] # list of dicts: timestamp, category, user_message, response
221
-
222
- def export_conversation_csv():
223
- fname = "conversation_log.csv"
224
- try:
225
- with open(fname, "w", newline="", encoding="utf-8") as csvfile:
226
- fieldnames = ["timestamp", "category", "user_message", "response"]
227
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
228
- writer.writeheader()
229
- writer.writerows(conversation_log)
230
- return fname
231
- except Exception as e:
232
- print("CSV export error:", e)
233
- return None
234
-
235
- # -----------------------------
236
- # Staging (collect responses into a category)
237
- # -----------------------------
238
- def stage_conversation_to_category(target_category: str):
239
- """
240
- Append conversation responses into the chosen staging category in memory (QUOTES).
241
- Returns a status message.
242
- """
243
- if not target_category or not isinstance(target_category, str):
244
- return "Invalid staging category name."
245
- added = 0
246
- QUOTES.setdefault(target_category, [])
247
- existing = set(QUOTES[target_category])
248
- for entry in conversation_log:
249
- resp = entry.get("response") or entry.get("bot_response") or ""
250
- if resp and resp not in existing:
251
- QUOTES[target_category].append(resp)
252
- existing.add(resp)
253
- added += 1
254
- return f"Added {added} unique responses to category '{target_category}'."
255
-
256
- def save_quotes_to_disk():
257
- """
258
- Overwrites DATA_PATH with current QUOTES in the new schema: {category: [quote, ...], ...}
259
- Returns filepath on success or None.
260
- """
261
- try:
262
- with open(DATA_PATH, "w", encoding="utf-8") as f:
263
- json.dump(QUOTES, f, ensure_ascii=False, indent=2)
264
- return DATA_PATH
265
- except Exception as e:
266
- print("Error saving quotes.json:", e)
267
- return None
268
-
269
- # -----------------------------
270
- # Chat respond callback (preserve chat UI behavior)
271
- # -----------------------------
272
- def respond(user_message, chat_history, category, domain_restrict):
273
- if chat_history is None:
274
- chat_history = []
275
-
276
  if not QUOTES:
277
- bot = "No dataset loaded. Please upload quotes.json first."
278
- chat_history.append((user_message, bot))
279
- return "", chat_history
280
 
281
  if not category:
282
  bot = "Please select a category."
283
- chat_history.append((user_message, bot))
284
- return "", chat_history
285
 
286
- # build 3-tier response
287
- summary, details, footer = assemble_response(category, user_message, domain_restrict)
288
- combined = f"{summary}\n\n{details}\n\n{footer}"
 
 
289
 
290
- # append to chat and log
291
- chat_history.append((user_message, combined))
292
- conversation_log.append({
293
- "timestamp": datetime.datetime.now().isoformat(),
294
- "category": category,
295
- "user_message": user_message,
296
- "response": combined
297
- })
298
 
299
- return "", chat_history
 
300
 
301
- # -----------------------------
302
- # File upload / download helpers
303
- # -----------------------------
304
  def upload_json(filepath):
305
- global QUOTES
306
  try:
307
  with open(filepath, "r", encoding="utf-8") as f:
308
  data = json.load(f)
309
- # Normalize to new schema if necessary
310
- if isinstance(data, dict):
311
- normalized = {}
312
- for cat, val in data.items():
313
- if isinstance(val, list):
314
- normalized[cat] = [str(q).strip() for q in val if isinstance(q, str) and q.strip()]
315
- elif isinstance(val, dict):
316
- merged = []
317
- for k in ("positive", "negative"):
318
- items = val.get(k, [])
319
- if isinstance(items, list):
320
- merged.extend([str(q).strip() for q in items if isinstance(q, str) and q.strip()])
321
- normalized[cat] = merged
322
- else:
323
- normalized[cat] = []
324
- QUOTES = normalized
325
- cats = sorted(list(QUOTES.keys()))
326
- status = f"Loaded {len(cats)} categories from uploaded file."
327
- return status, gr.update(choices=cats, value=(cats[0] if cats else None))
328
- else:
329
- return "Upload failed: JSON root must be an object/dict.", gr.update(choices=[])
330
  except Exception as e:
331
  return f"Error loading file: {e}", gr.update(choices=[])
332
 
333
- def download_quotes_file():
334
- """Return the DATAPATH for download (write if necessary)."""
335
- # Ensure current in-memory QUOTES is saved so download gets updated content
336
- saved = save_quotes_to_disk()
337
- return saved
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
  # -----------------------------
340
- # Gradio UI (keep chat area layout similar to your previous working UI)
341
  # -----------------------------
342
  with gr.Blocks() as demo:
343
- gr.Markdown("## 🎓 College Life Chatbot — Interview-style quotes")
344
 
345
- # Category dropdown (keys from QUOTES)
346
  initial_categories = sorted(list(QUOTES.keys()))
 
347
  with gr.Row():
348
- category = gr.Dropdown(label="Category", choices=initial_categories, value=(initial_categories[0] if initial_categories else None))
349
- domain_restrict = gr.Textbox(label="Restrict article search to domain (optional)", placeholder="example.com (optional)")
 
 
 
 
 
 
 
 
350
 
351
- # Chat area (unchanged look)
352
  chatbot = gr.Chatbot(label="Conversation", height=360, type="tuples")
353
  msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
354
  send = gr.Button("Send")
355
  clear = gr.Button("Clear")
356
 
357
- # Data I/O row(s): uploader, dataset download, conversation export (stable Button -> File pattern)
358
- with gr.Row():
359
- uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
360
- upload_status = gr.Textbox(label="Upload status", interactive=False)
361
-
362
  with gr.Row():
363
- download_btn = gr.Button("Download current quotes.json")
364
- download_file = gr.File(label="Dataset file (click to download)", interactive=False)
365
-
366
- export_btn = gr.Button("Export conversation (.csv)")
367
- export_file = gr.File(label="Conversation CSV (click to download)", interactive=False)
368
-
369
- # Staging controls (new)
370
- with gr.Row():
371
- staging_name = gr.Textbox(label="Staging category name", value="staged_responses", placeholder="Category name to stage exported replies")
372
- stage_btn = gr.Button("Stage conversation to category")
373
  stage_status = gr.Textbox(label="Stage status", interactive=False)
374
 
375
  with gr.Row():
376
- save_btn = gr.Button("Save quotes.json (write staged to disk)")
377
- save_status = gr.Textbox(label="Save status / download", interactive=False)
378
-
379
- # Wiring chat events (preserve send/enter behavior)
380
- msg.submit(respond, [msg, chatbot, category, domain_restrict], [msg, chatbot])
381
- send.click(respond, [msg, chatbot, category, domain_restrict], [msg, chatbot])
382
- clear.click(lambda: None, None, chatbot, queue=False)
383
-
384
- # Upload wiring
385
- uploader.upload(upload_json, uploader, [upload_status, category])
386
-
387
- # Downloads + exports wiring
388
- download_btn.click(download_quotes_file, outputs=download_file)
389
- export_btn.click(export_conversation_csv, outputs=export_file)
390
-
391
- # Staging wiring
392
- def _stage_and_return_status(name):
393
- msg = stage_conversation_to_category(name)
394
- # reflect updated categories in dropdown
395
- return msg, gr.update(choices=sorted(list(QUOTES.keys())), value=name)
396
 
397
- stage_btn.click(_stage_and_return_status, inputs=staging_name, outputs=[stage_status, category])
 
 
 
398
 
399
- def _save_and_report():
400
- path = save_quotes_to_disk()
401
- if path:
402
- return f"Saved to {path}"
403
- return "Failed to save quotes.json"
404
 
405
- save_btn.click(_save_and_report, outputs=save_status)
 
406
 
407
  # -----------------------------
408
  # Startup log
409
  # -----------------------------
410
  print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
411
  if QUOTES:
412
- for cat, quotes in QUOTES.items():
413
- print(f" - {cat}: {len(quotes)} quotes")
414
-
415
- if __name__ == "__main__":
416
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import json
 
 
 
2
  import random
3
  import re
4
  import string
5
+ import os
6
+ import datetime
7
+ import difflib
8
  import gradio as gr
 
9
 
10
  # -----------------------------
11
+ # Config / data loading
12
  # -----------------------------
13
  DATA_PATH = "quotes.json"
 
 
14
 
 
 
 
15
  def load_quotes():
16
+ if os.path.exists(DATA_PATH):
17
+ try:
18
+ with open(DATA_PATH, "r", encoding="utf-8") as f:
19
+ data = json.load(f)
20
+ # Ensure staged_responses bucket always exists
21
+ if "staged_responses" not in data:
22
+ data["staged_responses"] = []
23
+ print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.")
24
+ return data, DATA_PATH
25
+ except Exception as e:
26
+ print(f"Failed to load {DATA_PATH}: {e}")
27
+ # fallback: empty
28
+ return {"staged_responses": []}, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ QUOTES, DATA_PATH = load_quotes()
31
 
32
  # -----------------------------
33
+ # Text helpers
34
  # -----------------------------
 
 
35
  STOPWORDS = {
36
  "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
37
  "is","are","was","were","be","being","been","it","that","this","these","those","with",
38
  "as","by","from","about","into","over","after","before","up","down","out"
39
  }
40
 
41
+ POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
42
+ NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
43
+
44
+ punct_re = re.compile(f"[{re.escape(string.punctuation)}]")
45
 
46
  def normalize(text: str) -> str:
47
+ return punct_re.sub(" ", (text or "").lower())
48
 
49
  def tokenize(text: str):
50
  return [t for t in normalize(text).split() if t and t not in STOPWORDS]
51
 
52
+ def infer_sentiment(user_text: str) -> str:
53
+ tl = normalize(user_text)
54
+ has_pos = any(w in tl for w in POS_HINTS)
55
+ has_neg = any(w in tl for w in NEG_HINTS)
56
+ if has_pos and not has_neg:
57
+ return "positive"
58
+ if has_neg and not has_pos:
59
+ return "negative"
60
+ return "positive" # default
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  # -----------------------------
63
+ # Retrieval with fuzzy matching
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # -----------------------------
65
+ def best_match_quote(category: str, sentiment: str, user_text: str) -> str:
 
 
66
  if category not in QUOTES:
67
+ return f"No quotes found for category '{category}'."
68
+ pool = QUOTES[category]
69
+ if not pool:
70
+ return f"No quotes available in '{category}'."
71
+
72
+ q_tokens = set(tokenize(user_text))
73
+ best_score = -1
74
+ best_quote = None
75
+
76
+ for entry in pool:
77
+ qtoks = set(tokenize(entry.get("quote", "")))
78
+ score = len(q_tokens & qtoks)
79
+ # fuzzy matching fallback
80
+ if score == 0:
81
+ for word in q_tokens:
82
+ matches = difflib.get_close_matches(word, qtoks, n=1, cutoff=0.8)
83
+ if matches:
84
+ score += 1
85
+ if score > best_score:
86
+ best_score = score
87
+ best_quote = entry.get("quote", "")
88
+
89
+ if not best_quote:
90
+ return random.choice([e.get("quote","") for e in pool if "quote" in e])
91
+ return best_quote
92
+
93
+ # -----------------------------
94
+ # Gradio callbacks
95
+ # -----------------------------
96
+ def respond(message, history, category, sentiment_choice):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  if not QUOTES:
98
+ bot = "No dataset loaded. Please upload a JSON file first."
99
+ history.append((message, bot))
100
+ return "", history
101
 
102
  if not category:
103
  bot = "Please select a category."
104
+ history.append((message, bot))
105
+ return "", history
106
 
107
+ # sentiment not really used with interview-style data, but kept for compatibility
108
+ if sentiment_choice == "auto":
109
+ sent = infer_sentiment(message)
110
+ else:
111
+ sent = sentiment_choice
112
 
113
+ bot = best_match_quote(category, sent, message)
114
+ history.append((message, bot))
115
+ return "", history
 
 
 
 
 
116
 
117
+ def clear_chat():
118
+ return None
119
 
 
 
 
120
  def upload_json(filepath):
121
+ global QUOTES, DATA_PATH
122
  try:
123
  with open(filepath, "r", encoding="utf-8") as f:
124
  data = json.load(f)
125
+ if not isinstance(data, dict):
126
+ return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[])
127
+
128
+ if "staged_responses" not in data:
129
+ data["staged_responses"] = []
130
+
131
+ QUOTES = data
132
+ DATA_PATH = os.path.basename(filepath)
133
+ cats = sorted(list(QUOTES.keys()))
134
+ status = f"Loaded {len(cats)} categories from {DATA_PATH}."
135
+ return status, gr.update(choices=cats, value=(cats[0] if cats else None))
 
 
 
 
 
 
 
 
 
 
136
  except Exception as e:
137
  return f"Error loading file: {e}", gr.update(choices=[])
138
 
139
+ def download_current():
140
+ """Download dataset including staged responses."""
141
+ out_name = DATA_PATH or "quotes_export.json"
142
+ try:
143
+ tmp = "quotes_export.json"
144
+ with open(tmp, "w", encoding="utf-8") as f:
145
+ json.dump(QUOTES, f, indent=2, ensure_ascii=False)
146
+ return tmp
147
+ except Exception:
148
+ return None
149
+
150
+ def stage_conversation(history, category):
151
+ if not category:
152
+ return "Please select a category to stage into."
153
+
154
+ staged = QUOTES.get("staged_responses", [])
155
+ for msg, bot in history:
156
+ staged.append({
157
+ "category": category,
158
+ "user": msg,
159
+ "bot": bot
160
+ })
161
+ QUOTES["staged_responses"] = staged
162
+ return f"Staged {len(history)} exchanges into 'staged_responses'."
163
 
164
  # -----------------------------
165
+ # UI
166
  # -----------------------------
167
  with gr.Blocks() as demo:
168
+ gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, Similarity Matching")
169
 
 
170
  initial_categories = sorted(list(QUOTES.keys()))
171
+
172
  with gr.Row():
173
+ category = gr.Dropdown(
174
+ label="Category",
175
+ choices=initial_categories,
176
+ value=(initial_categories[0] if initial_categories else None)
177
+ )
178
+ sentiment = gr.Dropdown(
179
+ label="Sentiment",
180
+ choices=["auto", "positive", "negative"],
181
+ value="auto"
182
+ )
183
 
 
184
  chatbot = gr.Chatbot(label="Conversation", height=360, type="tuples")
185
  msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
186
  send = gr.Button("Send")
187
  clear = gr.Button("Clear")
188
 
 
 
 
 
 
189
  with gr.Row():
190
+ stage_btn = gr.Button("Stage Conversation to Category")
 
 
 
 
 
 
 
 
 
191
  stage_status = gr.Textbox(label="Stage status", interactive=False)
192
 
193
  with gr.Row():
194
+ uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
195
+ upload_status = gr.Textbox(label="Upload status", interactive=False)
196
+ downloader = gr.File(label="Download current dataset")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
+ # Wire events
199
+ msg.submit(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
200
+ send.click(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
201
+ clear.click(clear_chat, None, chatbot, queue=False)
202
 
203
+ stage_btn.click(stage_conversation, [chatbot, category], stage_status)
 
 
 
 
204
 
205
+ uploader.upload(upload_json, uploader, [upload_status, category])
206
+ downloader.download(download_current)
207
 
208
  # -----------------------------
209
  # Startup log
210
  # -----------------------------
211
  print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
212
  if QUOTES:
213
+ for cat, items in QUOTES.items():
214
+ if isinstance(items, list):
215
+ print(f" - {cat}: {len(items)} entries")