igortech commited on
Commit
fa16b47
·
verified ·
1 Parent(s): 35eb385

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -61
app.py CHANGED
@@ -40,7 +40,7 @@ STOPWORDS = {
40
  POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
41
  NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
42
 
43
- punct_re = re.compile(f"[{re.escape('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')}]")
44
 
45
  def normalize(text: str) -> str:
46
  return punct_re.sub(" ", (text or "").lower())
@@ -62,50 +62,56 @@ def infer_sentiment(user_text: str) -> str:
62
  # Retrieval
63
  # -----------------------------
64
  def best_match_quote(user_text: str) -> str:
65
- """Search across all categories with fuzzy matching and return best quote."""
66
- max_score = 0
67
  best_quote = None
68
 
69
- for cat_quotes in QUOTES.values():
70
- for q_obj in cat_quotes:
71
- q_text = q_obj.get("quote", "")
72
- score = fuzz.token_set_ratio(user_text.lower(), q_text.lower())
73
- if score > max_score:
74
- max_score = score
75
- best_quote = q_text
76
-
77
- if max_score < 30: # threshold; anything below treated as unknown
 
 
 
78
  return f"No data about '{user_text}'"
79
  return best_quote
80
 
81
- # -----------------------------
82
- # 3-fold response generation
83
- # -----------------------------
84
- def generate_three_fold_response(user_text: str):
85
- quote = best_match_quote(user_text)
86
- if quote.startswith("No data"):
87
- return [quote, "", ""]
88
-
89
- # Very simple 3-fold split
90
- sentences = [s.strip() for s in quote.split('.') if s.strip()]
91
- first = sentences[0] if len(sentences) > 0 else ""
92
- second = " ".join(sentences[1:3]) if len(sentences) > 2 else (sentences[1] if len(sentences)>1 else "")
93
- third = " ".join(sentences[3:]) if len(sentences) > 3 else ""
94
- return [first, second, third]
95
-
96
  # -----------------------------
97
  # Gradio callbacks
98
  # -----------------------------
99
  def respond(message, history, category):
100
- if not message:
 
 
 
 
 
 
 
 
 
101
  return "", history
102
- responses = generate_three_fold_response(message)
103
- bot_response = [
104
- {"label": "Summary", "text": responses[0]},
105
- {"label": "Details", "text": responses[1]},
106
- {"label": "What real people say", "text": responses[2]}
107
- ]
108
- history.append((message, bot_response))
 
 
 
 
 
 
 
 
 
109
  return "", history
110
 
111
  def clear_chat():
@@ -127,34 +133,26 @@ def upload_json(filepath):
127
  return f"Error loading file: {e}", gr.update(choices=[])
128
 
129
  def download_current_json():
130
- tmp = DATA_PATH or "quotes_export.json"
131
- with open(tmp, "w", encoding="utf-8") as f:
132
  json.dump(QUOTES, f, indent=2, ensure_ascii=False)
133
- return tmp
134
 
135
  def download_conversation_csv(history):
136
- if not history:
137
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
138
- tmp.close()
139
- return tmp.name
140
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode='w', newline='', encoding='utf-8')
141
- writer = csv.writer(tmp)
142
- writer.writerow(["User Message", "Summary", "Details", "What real people say"])
143
- for msg, bot_resp in history:
144
- summary = bot_resp[0]['text']
145
- details = bot_resp[1]['text']
146
- real_people = bot_resp[2]['text']
147
- writer.writerow([msg, summary, details, real_people])
148
- tmp.close()
149
- return tmp.name
150
 
151
  # -----------------------------
152
  # UI
153
  # -----------------------------
154
  with gr.Blocks() as demo:
155
- gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, 3-Fold Responses")
156
 
157
- # Category list from loaded data (may be empty until upload)
158
  initial_categories = sorted(list(QUOTES.keys()))
159
 
160
  with gr.Row():
@@ -170,27 +168,27 @@ with gr.Blocks() as demo:
170
  clear = gr.Button("Clear")
171
 
172
  with gr.Row():
173
- uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
174
  upload_status = gr.Textbox(label="Upload status", interactive=False)
175
- download_json_btn = gr.File(label="Download dataset")
176
  download_csv_btn = gr.File(label="Export conversation to CSV")
177
 
178
- # Wire events
179
  msg.submit(respond, [msg, chatbot, category], [msg, chatbot])
180
  send.click(respond, [msg, chatbot, category], [msg, chatbot])
181
  clear.click(clear_chat, None, chatbot, queue=False)
182
 
183
- uploader.upload(upload_json, uploader, [upload_status, category])
184
  download_json_btn.download(download_current_json)
185
- download_csv_btn.click(download_conversation_csv, chatbot, download_csv_btn)
186
 
187
  # -----------------------------
188
  # Startup log
189
  # -----------------------------
190
  print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
191
  if QUOTES:
192
- for cat, qlist in QUOTES.items():
193
- print(f" - {cat}: {len(qlist)} entries")
194
 
195
  if __name__ == "__main__":
196
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
40
  POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
41
  NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
42
 
43
+ punct_re = re.compile(r"[{}]".format(re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""")))
44
 
45
  def normalize(text: str) -> str:
46
  return punct_re.sub(" ", (text or "").lower())
 
62
  # Retrieval
63
  # -----------------------------
64
  def best_match_quote(user_text: str) -> str:
65
+ """Search all categories, return best fuzzy match, fallback to random from any category."""
66
+ best_score = 0
67
  best_quote = None
68
 
69
+ for cat, quotes_list in QUOTES.items():
70
+ for quote_entry in quotes_list:
71
+ quote = quote_entry.get("quote", "")
72
+ if not quote.strip():
73
+ continue
74
+ score = fuzz.partial_ratio(user_text.lower(), quote.lower())
75
+ if score > best_score:
76
+ best_score = score
77
+ best_quote = quote
78
+
79
+ # Threshold for unknown
80
+ if best_score < 30 or best_quote is None:
81
  return f"No data about '{user_text}'"
82
  return best_quote
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  # -----------------------------
85
  # Gradio callbacks
86
  # -----------------------------
87
  def respond(message, history, category):
88
+ if not QUOTES:
89
+ bot = "No dataset loaded. Please upload a JSON file first."
90
+ history.append({"role": "user", "content": message})
91
+ history.append({"role": "assistant", "content": bot})
92
+ return "", history
93
+
94
+ if not category:
95
+ bot = "Please select a category."
96
+ history.append({"role": "user", "content": message})
97
+ history.append({"role": "assistant", "content": bot})
98
  return "", history
99
+
100
+ quote = best_match_quote(message)
101
+
102
+ # 3-fold response
103
+ summary = quote.split(". ")[0] + "." if "." in quote else quote
104
+ detail = quote
105
+ unknown = ""
106
+ if "No data about" in quote:
107
+ unknown = quote
108
+
109
+ bot_text = f"Summary:\n{summary}\n\nWhat real people say:\n{detail}"
110
+ if unknown:
111
+ bot_text += f"\n\n{unknown}"
112
+
113
+ history.append({"role": "user", "content": message})
114
+ history.append({"role": "assistant", "content": bot_text})
115
  return "", history
116
 
117
  def clear_chat():
 
133
  return f"Error loading file: {e}", gr.update(choices=[])
134
 
135
  def download_current_json():
136
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
137
+ with open(tmp_file.name, "w", encoding="utf-8") as f:
138
  json.dump(QUOTES, f, indent=2, ensure_ascii=False)
139
+ return tmp_file.name
140
 
141
  def download_conversation_csv(history):
142
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
143
+ with open(tmp_file.name, "w", newline="", encoding="utf-8") as f:
144
+ writer = csv.writer(f)
145
+ writer.writerow(["role", "message"])
146
+ for msg in history:
147
+ writer.writerow([msg.get("role"), msg.get("content")])
148
+ return tmp_file.name
 
 
 
 
 
 
 
149
 
150
  # -----------------------------
151
  # UI
152
  # -----------------------------
153
  with gr.Blocks() as demo:
154
+ gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, Fuzzy Matching")
155
 
 
156
  initial_categories = sorted(list(QUOTES.keys()))
157
 
158
  with gr.Row():
 
168
  clear = gr.Button("Clear")
169
 
170
  with gr.Row():
171
+ upload_btn = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
172
  upload_status = gr.Textbox(label="Upload status", interactive=False)
173
+ download_json_btn = gr.File(label="Download current dataset (.json)")
174
  download_csv_btn = gr.File(label="Export conversation to CSV")
175
 
176
+ # Events
177
  msg.submit(respond, [msg, chatbot, category], [msg, chatbot])
178
  send.click(respond, [msg, chatbot, category], [msg, chatbot])
179
  clear.click(clear_chat, None, chatbot, queue=False)
180
 
181
+ upload_btn.upload(upload_json, upload_btn, [upload_status, category])
182
  download_json_btn.download(download_current_json)
183
+ download_csv_btn.download(lambda: download_conversation_csv(chatbot.value))
184
 
185
  # -----------------------------
186
  # Startup log
187
  # -----------------------------
188
  print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
189
  if QUOTES:
190
+ for cat, entries in QUOTES.items():
191
+ print(f" - {cat}: {len(entries)} entries")
192
 
193
  if __name__ == "__main__":
194
  demo.launch(server_name="0.0.0.0", server_port=7860)