igortech commited on
Commit
19601ea
·
verified ·
1 Parent(s): a4ee3b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -52
app.py CHANGED
@@ -16,11 +16,9 @@ def load_dataset():
16
  if os.path.exists(DATA_PATH):
17
  with open(DATA_PATH, "r", encoding="utf-8") as f:
18
  data = json.load(f)
19
- # ensure staged_responses exists
20
  if "staged_responses" not in data:
21
  data["staged_responses"] = []
22
  return data
23
- # default empty dataset with staged bucket
24
  return {"staged_responses": []}
25
 
26
  dataset = load_dataset()
@@ -37,16 +35,14 @@ def tokens(s: str):
37
  def score_quote(user_input: str, quote_text: str):
38
  """
39
  Score a quote vs user input:
40
- - token overlap gets a boosted score
41
  - otherwise fallback to SequenceMatcher ratio
42
  """
43
  u_toks = tokens(user_input)
44
  q_toks = tokens(quote_text)
45
  overlap = len(u_toks & q_toks)
46
  if overlap > 0:
47
- # strong signal: >=1.0 plus a small bonus for proportion overlap
48
  return 1.0 + (overlap / max(1, len(q_toks)))
49
- # fuzzy fallback
50
  return SequenceMatcher(None, user_input.lower(), quote_text.lower()).ratio()
51
 
52
  def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
@@ -55,7 +51,6 @@ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
55
  - try within `category` first (if provided)
56
  - if none above `threshold`, search across all categories
57
  - return list of tuples (score, quote, category)
58
- - if nothing passes threshold, return empty list
59
  """
60
  if not user_input or not user_input.strip():
61
  return []
@@ -68,8 +63,7 @@ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
68
  scored.append((s, q, cat))
69
  return scored
70
 
71
- # 1) try selected category first
72
- scored = []
73
  if category and category in dataset and category != "staged_responses":
74
  scored = score_list_for_cat(category)
75
  scored.sort(key=lambda x: x[0], reverse=True)
@@ -86,7 +80,7 @@ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
86
  if all_scored and all_scored[0][0] >= threshold:
87
  return all_scored[:top_n]
88
 
89
- # 3) nothing
90
  return []
91
 
92
  # -----------------------------
@@ -95,20 +89,14 @@ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
95
  def generate_three_fold(category, user_text):
96
  matches = find_best_quotes(category, user_text, top_n=3, threshold=0.15)
97
  if not matches:
98
- # Unknown fallback
99
  unknown_msg = f"No data about {user_text} (unknown)."
100
  return unknown_msg, unknown_msg, "Reference: None"
101
 
102
- # Build summary from top match's first sentence
103
  top_quote = matches[0][1]
104
  first_sentence = top_quote.split(".")[0].strip()
105
  summary = f"Summary: {first_sentence}."
106
-
107
- # Fusion: join unique quotes (up to 3)
108
- fused = " ".join(dict.fromkeys([m[1] for m in matches])) # preserve order, remove duplicates
109
  fusion = f"Fusion: {fused}"
110
-
111
- # Reference: simple placeholder with category and top matched category
112
  top_cat = matches[0][2]
113
  reference = f"Reference: Example search for '{category}' (top match from '{top_cat}')."
114
  return summary, fusion, reference
@@ -117,71 +105,142 @@ def generate_three_fold(category, user_text):
117
  # Conversation & staging utilities
118
  # -----------------------------
119
  def append_user_assistant(history, user_text, assistant_text):
120
- # history is a list of message dicts: {"role": "user"/"assistant", "content": "..."}
121
  history = history or []
122
  history.append({"role": "user", "content": user_text})
123
  history.append({"role": "assistant", "content": assistant_text})
124
  return history
125
 
126
  def get_last_user_and_assistant(history):
127
- # Find the last user message and the first assistant message that follows it
128
  last_user = None
129
  last_assistant = None
130
  if not history:
131
  return None, None
132
- # traverse backwards
 
 
133
  for i in range(len(history)-1, -1, -1):
134
- msg = history[i]
135
- if last_assistant is None and msg["role"] == "assistant":
136
- last_assistant = msg["content"]
137
- if msg["role"] == "user":
138
- last_user = msg["content"]
139
- # once we have both, break
140
  break
141
- # if assistant message came *before* last user (unlikely in our flow), try to find assistant after user
142
- if last_user and not last_assistant:
143
- for i in range(len(history)-1, -1, -1):
144
- if history[i]["role"] == "assistant":
145
- last_assistant = history[i]["content"]
146
  break
147
  return last_user, last_assistant
148
 
149
  # -----------------------------
150
- # File helpers
151
  # -----------------------------
152
- def prepare_json_download(obj):
153
- text = json.dumps(obj, indent=2, ensure_ascii=False)
154
- return {"name": f"dataset_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
155
- "data": text.encode("utf-8")}
 
 
 
156
 
157
- def prepare_csv_download(history):
158
  if not history:
159
  return None
160
- from io import StringIO
161
- s = StringIO()
162
- writer = csv.writer(s)
163
- writer.writerow(["role", "content"])
164
- for m in history:
165
- writer.writerow([m.get("role", ""), m.get("content", "")])
166
- return {"name": f"conversation_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
167
- "data": s.getvalue().encode("utf-8")}
 
168
 
169
  # -----------------------------
170
- # Gradio callbacks
171
  # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  def download_conversation_csv(state):
173
- return prepare_csv_download(state or [])
 
 
 
174
 
175
  def download_current_dataset():
176
- return prepare_json_download(dataset)
 
177
 
178
  # -----------------------------
179
- # Gradio UI
180
  # -----------------------------
181
  with gr.Blocks() as demo:
182
  gr.Markdown("## Campus Life — 3-fold responses, staging, CSV/JSON downloads")
183
 
184
- # dropdown, chatbot, textbox, send, clear (UNCHANGED) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  with gr.Row():
187
  upload = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
@@ -192,9 +251,16 @@ with gr.Blocks() as demo:
192
  download_csv_file = gr.File(label="Download CSV", interactive=True)
193
 
194
  # events
195
- # ... unchanged ...
196
- download_csv_btn.click(download_conversation_csv, conversation_state, download_csv_file)
197
- download_json_btn.click(download_conversation_json, conversation_state, download_json_file)
 
 
 
 
 
 
 
198
 
199
  # -----------------------------
200
  # Startup log
 
16
  if os.path.exists(DATA_PATH):
17
  with open(DATA_PATH, "r", encoding="utf-8") as f:
18
  data = json.load(f)
 
19
  if "staged_responses" not in data:
20
  data["staged_responses"] = []
21
  return data
 
22
  return {"staged_responses": []}
23
 
24
  dataset = load_dataset()
 
35
  def score_quote(user_input: str, quote_text: str):
36
  """
37
  Score a quote vs user input:
38
+ - token overlap yields a boosted score
39
  - otherwise fallback to SequenceMatcher ratio
40
  """
41
  u_toks = tokens(user_input)
42
  q_toks = tokens(quote_text)
43
  overlap = len(u_toks & q_toks)
44
  if overlap > 0:
 
45
  return 1.0 + (overlap / max(1, len(q_toks)))
 
46
  return SequenceMatcher(None, user_input.lower(), quote_text.lower()).ratio()
47
 
48
  def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
 
51
  - try within `category` first (if provided)
52
  - if none above `threshold`, search across all categories
53
  - return list of tuples (score, quote, category)
 
54
  """
55
  if not user_input or not user_input.strip():
56
  return []
 
63
  scored.append((s, q, cat))
64
  return scored
65
 
66
+ # 1) search selected category first (if present)
 
67
  if category and category in dataset and category != "staged_responses":
68
  scored = score_list_for_cat(category)
69
  scored.sort(key=lambda x: x[0], reverse=True)
 
80
  if all_scored and all_scored[0][0] >= threshold:
81
  return all_scored[:top_n]
82
 
83
+ # 3) nothing matches well enough
84
  return []
85
 
86
  # -----------------------------
 
89
  def generate_three_fold(category, user_text):
90
  matches = find_best_quotes(category, user_text, top_n=3, threshold=0.15)
91
  if not matches:
 
92
  unknown_msg = f"No data about {user_text} (unknown)."
93
  return unknown_msg, unknown_msg, "Reference: None"
94
 
 
95
  top_quote = matches[0][1]
96
  first_sentence = top_quote.split(".")[0].strip()
97
  summary = f"Summary: {first_sentence}."
98
+ fused = " ".join(dict.fromkeys([m[1] for m in matches])) # unique preserve order
 
 
99
  fusion = f"Fusion: {fused}"
 
 
100
  top_cat = matches[0][2]
101
  reference = f"Reference: Example search for '{category}' (top match from '{top_cat}')."
102
  return summary, fusion, reference
 
105
  # Conversation & staging utilities
106
  # -----------------------------
107
  def append_user_assistant(history, user_text, assistant_text):
 
108
  history = history or []
109
  history.append({"role": "user", "content": user_text})
110
  history.append({"role": "assistant", "content": assistant_text})
111
  return history
112
 
113
  def get_last_user_and_assistant(history):
 
114
  last_user = None
115
  last_assistant = None
116
  if not history:
117
  return None, None
118
+ # find last user and assistant after it
119
+ # traverse backwards to find last user; then find next assistant after that index
120
+ last_user_idx = None
121
  for i in range(len(history)-1, -1, -1):
122
+ if history[i].get("role") == "user":
123
+ last_user_idx = i
124
+ last_user = history[i].get("content")
 
 
 
125
  break
126
+ if last_user_idx is not None:
127
+ # find assistant after user (forward from user index)
128
+ for j in range(last_user_idx+1, len(history)):
129
+ if history[j].get("role") == "assistant":
130
+ last_assistant = history[j].get("content")
131
  break
132
  return last_user, last_assistant
133
 
134
  # -----------------------------
135
+ # Temp file helpers
136
  # -----------------------------
137
+ def write_temp_json(obj, suffix=".json"):
138
+ tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
139
+ path = tf.name
140
+ tf.close()
141
+ with open(path, "w", encoding="utf-8") as f:
142
+ json.dump(obj, f, indent=2, ensure_ascii=False)
143
+ return path
144
 
145
+ def write_temp_csv_from_history(history, suffix=".csv"):
146
  if not history:
147
  return None
148
+ tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
149
+ path = tf.name
150
+ tf.close()
151
+ with open(path, "w", newline="", encoding="utf-8") as f:
152
+ writer = csv.writer(f)
153
+ writer.writerow(["role", "content"])
154
+ for m in history:
155
+ writer.writerow([m.get("role",""), m.get("content","")])
156
+ return path
157
 
158
  # -----------------------------
159
+ # Gradio callbacks (UI-safe)
160
  # -----------------------------
161
+ def respond(message, state, category):
162
+ """
163
+ Called by Send button or Enter.
164
+ Returns: cleared input, updated state, updated chatbot display (state replicated)
165
+ """
166
+ history = state or []
167
+ if not (message and message.strip()):
168
+ return "", history, history
169
+
170
+ summary, fusion, reference = generate_three_fold(category, message)
171
+ assistant_text = f"{summary}\n\n{fusion}\n\n{reference}"
172
+ history = append_user_assistant(history, message, assistant_text)
173
+ return "", history, history
174
+
175
+ def clear_all():
176
+ # clear textbox, state and chatbot
177
+ return "", [], []
178
+
179
+ def upload_json(filepath):
180
+ """Load uploaded dataset file (filepath is local path inside container)"""
181
+ global dataset, DATA_PATH
182
+ try:
183
+ with open(filepath, "r", encoding="utf-8") as f:
184
+ data = json.load(f)
185
+ if not isinstance(data, dict):
186
+ return "Upload failed: root must be an object", gr.update(choices=sorted(list(dataset.keys())), value=None)
187
+ if "staged_responses" not in data:
188
+ data["staged_responses"] = []
189
+ dataset = data
190
+ DATA_PATH = os.path.basename(filepath)
191
+ cats = sorted([k for k in dataset.keys() if k != "staged_responses"])
192
+ status = f"Loaded {len(cats)} categories from {DATA_PATH}."
193
+ return status, gr.update(choices=cats, value=(cats[0] if cats else None))
194
+ except Exception as e:
195
+ return f"Error loading file: {e}", gr.update(choices=sorted(list(dataset.keys())), value=None)
196
+
197
+ def stage_last_conversation(state, target_category):
198
+ """
199
+ Stage the last user + assistant pair into dataset['staged_responses']
200
+ (stored as {"question":..., "answer":..., "category":...})
201
+ """
202
+ if not state:
203
+ return "No conversation in memory."
204
+ last_user, last_assistant = get_last_user_and_assistant(state)
205
+ if not last_user:
206
+ return "No user message to stage."
207
+ entry = {"question": last_user, "answer": last_assistant or "", "category": target_category}
208
+ if "staged_responses" not in dataset:
209
+ dataset["staged_responses"] = []
210
+ dataset["staged_responses"].append(entry)
211
+ return f"Staged last Q/A into '{target_category}'."
212
+
213
  def download_conversation_csv(state):
214
+ path = write_temp_csv_from_history(state or [])
215
+ if not path:
216
+ return gr.File.update(value=None)
217
+ return gr.File.update(value=path)
218
 
219
  def download_current_dataset():
220
+ path = write_temp_json(dataset, suffix=".json")
221
+ return gr.File.update(value=path)
222
 
223
  # -----------------------------
224
+ # Gradio UI (components + wiring)
225
  # -----------------------------
226
  with gr.Blocks() as demo:
227
  gr.Markdown("## Campus Life — 3-fold responses, staging, CSV/JSON downloads")
228
 
229
+ # dropdown choices exclude staged_responses
230
+ category_choices = sorted([k for k in dataset.keys() if k != "staged_responses"])
231
+ with gr.Row():
232
+ category = gr.Dropdown(label="Category", choices=category_choices,
233
+ value=(category_choices[0] if category_choices else None))
234
+
235
+ chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
236
+ conversation_state = gr.State([]) # holds list of {"role":..,"content":..}
237
+ msg = gr.Textbox(label="Your message", placeholder="Type and press Enter (or click Send)", autofocus=True)
238
+ send = gr.Button("Send")
239
+ clear = gr.Button("Clear")
240
+
241
+ with gr.Row():
242
+ stage_btn = gr.Button("Stage last Q/A to category")
243
+ stage_status = gr.Textbox(label="Stage status", interactive=False, value="")
244
 
245
  with gr.Row():
246
  upload = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
 
251
  download_csv_file = gr.File(label="Download CSV", interactive=True)
252
 
253
  # events
254
+ msg.submit(respond, [msg, conversation_state, category], [msg, conversation_state, chatbot])
255
+ send.click(respond, [msg, conversation_state, category], [msg, conversation_state, chatbot])
256
+ clear.click(clear_all, [], [msg, conversation_state, chatbot])
257
+
258
+ stage_btn.click(stage_last_conversation, [conversation_state, category], stage_status)
259
+
260
+ upload.upload(upload_json, upload, [upload_status, category])
261
+
262
+ download_csv_btn.click(download_conversation_csv, [conversation_state], download_csv_file)
263
+ download_json_btn.click(download_current_dataset, None, download_json_file)
264
 
265
  # -----------------------------
266
  # Startup log