Spaces:

chitech2026
/

sandbox_dev

Sleeping

App Files Files Community

igortech commited on Sep 16, 2025

Commit

19601ea

verified ·

1 Parent(s): a4ee3b9

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -52

app.py CHANGED Viewed

@@ -16,11 +16,9 @@ def load_dataset():
     if os.path.exists(DATA_PATH):
         with open(DATA_PATH, "r", encoding="utf-8") as f:
             data = json.load(f)
-            # ensure staged_responses exists
             if "staged_responses" not in data:
                 data["staged_responses"] = []
             return data
-    # default empty dataset with staged bucket
     return {"staged_responses": []}
 dataset = load_dataset()
@@ -37,16 +35,14 @@ def tokens(s: str):
 def score_quote(user_input: str, quote_text: str):
     """
     Score a quote vs user input:
-      - token overlap gets a boosted score
       - otherwise fallback to SequenceMatcher ratio
     """
     u_toks = tokens(user_input)
     q_toks = tokens(quote_text)
     overlap = len(u_toks & q_toks)
     if overlap > 0:
-        # strong signal: >=1.0 plus a small bonus for proportion overlap
         return 1.0 + (overlap / max(1, len(q_toks)))
-    # fuzzy fallback
     return SequenceMatcher(None, user_input.lower(), quote_text.lower()).ratio()
 def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
@@ -55,7 +51,6 @@ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
      - try within `category` first (if provided)
      - if none above `threshold`, search across all categories
      - return list of tuples (score, quote, category)
-     - if nothing passes threshold, return empty list
     """
     if not user_input or not user_input.strip():
         return []
@@ -68,8 +63,7 @@ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
             scored.append((s, q, cat))
         return scored
-    # 1) try selected category first
-    scored = []
     if category and category in dataset and category != "staged_responses":
         scored = score_list_for_cat(category)
         scored.sort(key=lambda x: x[0], reverse=True)
@@ -86,7 +80,7 @@ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
     if all_scored and all_scored[0][0] >= threshold:
         return all_scored[:top_n]
-    # 3) nothing
     return []
 # -----------------------------
@@ -95,20 +89,14 @@ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
 def generate_three_fold(category, user_text):
     matches = find_best_quotes(category, user_text, top_n=3, threshold=0.15)
     if not matches:
-        # Unknown fallback
         unknown_msg = f"No data about {user_text} (unknown)."
         return unknown_msg, unknown_msg, "Reference: None"
-    # Build summary from top match's first sentence
     top_quote = matches[0][1]
     first_sentence = top_quote.split(".")[0].strip()
     summary = f"Summary: {first_sentence}."
-    # Fusion: join unique quotes (up to 3)
-    fused = " ".join(dict.fromkeys([m[1] for m in matches]))  # preserve order, remove duplicates
     fusion = f"Fusion: {fused}"
-    # Reference: simple placeholder with category and top matched category
     top_cat = matches[0][2]
     reference = f"Reference: Example search for '{category}' (top match from '{top_cat}')."
     return summary, fusion, reference
@@ -117,71 +105,142 @@ def generate_three_fold(category, user_text):
 # Conversation & staging utilities
 # -----------------------------
 def append_user_assistant(history, user_text, assistant_text):
-    # history is a list of message dicts: {"role": "user"/"assistant", "content": "..."}
     history = history or []
     history.append({"role": "user", "content": user_text})
     history.append({"role": "assistant", "content": assistant_text})
     return history
 def get_last_user_and_assistant(history):
-    # Find the last user message and the first assistant message that follows it
     last_user = None
     last_assistant = None
     if not history:
         return None, None
-    # traverse backwards
     for i in range(len(history)-1, -1, -1):
-        msg = history[i]
-        if last_assistant is None and msg["role"] == "assistant":
-            last_assistant = msg["content"]
-        if msg["role"] == "user":
-            last_user = msg["content"]
-            # once we have both, break
             break
-    # if assistant message came *before* last user (unlikely in our flow), try to find assistant after user
-    if last_user and not last_assistant:
-        for i in range(len(history)-1, -1, -1):
-            if history[i]["role"] == "assistant":
-                last_assistant = history[i]["content"]
                 break
     return last_user, last_assistant
 # -----------------------------
-# File helpers
 # -----------------------------
-def prepare_json_download(obj):
-    text = json.dumps(obj, indent=2, ensure_ascii=False)
-    return {"name": f"dataset_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
-            "data": text.encode("utf-8")}
-def prepare_csv_download(history):
     if not history:
         return None
-    from io import StringIO
-    s = StringIO()
-    writer = csv.writer(s)
-    writer.writerow(["role", "content"])
-    for m in history:
-        writer.writerow([m.get("role", ""), m.get("content", "")])
-    return {"name": f"conversation_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
-            "data": s.getvalue().encode("utf-8")}
 # -----------------------------
-# Gradio callbacks
 # -----------------------------
 def download_conversation_csv(state):
-    return prepare_csv_download(state or [])
 def download_current_dataset():
-    return prepare_json_download(dataset)
 # -----------------------------
-# Gradio UI
 # -----------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## Campus Life — 3-fold responses, staging, CSV/JSON downloads")
-    # dropdown, chatbot, textbox, send, clear (UNCHANGED) ...
     with gr.Row():
         upload = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
@@ -192,9 +251,16 @@ with gr.Blocks() as demo:
         download_csv_file = gr.File(label="Download CSV", interactive=True)
     # events
-    # ... unchanged ...
-    download_csv_btn.click(download_conversation_csv, conversation_state, download_csv_file)
-download_json_btn.click(download_conversation_json, conversation_state, download_json_file)
 # -----------------------------
 # Startup log

     if os.path.exists(DATA_PATH):
         with open(DATA_PATH, "r", encoding="utf-8") as f:
             data = json.load(f)
             if "staged_responses" not in data:
                 data["staged_responses"] = []
             return data
     return {"staged_responses": []}
 dataset = load_dataset()
 def score_quote(user_input: str, quote_text: str):
     """
     Score a quote vs user input:
+      - token overlap yields a boosted score
       - otherwise fallback to SequenceMatcher ratio
     """
     u_toks = tokens(user_input)
     q_toks = tokens(quote_text)
     overlap = len(u_toks & q_toks)
     if overlap > 0:
         return 1.0 + (overlap / max(1, len(q_toks)))
     return SequenceMatcher(None, user_input.lower(), quote_text.lower()).ratio()
 def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
      - try within `category` first (if provided)
      - if none above `threshold`, search across all categories
      - return list of tuples (score, quote, category)
     """
     if not user_input or not user_input.strip():
         return []
             scored.append((s, q, cat))
         return scored
+    # 1) search selected category first (if present)
     if category and category in dataset and category != "staged_responses":
         scored = score_list_for_cat(category)
         scored.sort(key=lambda x: x[0], reverse=True)
     if all_scored and all_scored[0][0] >= threshold:
         return all_scored[:top_n]
+    # 3) nothing matches well enough
     return []
 # -----------------------------
 def generate_three_fold(category, user_text):
     matches = find_best_quotes(category, user_text, top_n=3, threshold=0.15)
     if not matches:
         unknown_msg = f"No data about {user_text} (unknown)."
         return unknown_msg, unknown_msg, "Reference: None"
     top_quote = matches[0][1]
     first_sentence = top_quote.split(".")[0].strip()
     summary = f"Summary: {first_sentence}."
+    fused = " ".join(dict.fromkeys([m[1] for m in matches]))  # unique preserve order
     fusion = f"Fusion: {fused}"
     top_cat = matches[0][2]
     reference = f"Reference: Example search for '{category}' (top match from '{top_cat}')."
     return summary, fusion, reference
 # Conversation & staging utilities
 # -----------------------------
 def append_user_assistant(history, user_text, assistant_text):
     history = history or []
     history.append({"role": "user", "content": user_text})
     history.append({"role": "assistant", "content": assistant_text})
     return history
 def get_last_user_and_assistant(history):
     last_user = None
     last_assistant = None
     if not history:
         return None, None
+    # find last user and assistant after it
+    # traverse backwards to find last user; then find next assistant after that index
+    last_user_idx = None
     for i in range(len(history)-1, -1, -1):
+        if history[i].get("role") == "user":
+            last_user_idx = i
+            last_user = history[i].get("content")
             break
+    if last_user_idx is not None:
+        # find assistant after user (forward from user index)
+        for j in range(last_user_idx+1, len(history)):
+            if history[j].get("role") == "assistant":
+                last_assistant = history[j].get("content")
                 break
     return last_user, last_assistant
 # -----------------------------
+# Temp file helpers
 # -----------------------------
+def write_temp_json(obj, suffix=".json"):
+    tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+    path = tf.name
+    tf.close()
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(obj, f, indent=2, ensure_ascii=False)
+    return path
+def write_temp_csv_from_history(history, suffix=".csv"):
     if not history:
         return None
+    tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+    path = tf.name
+    tf.close()
+    with open(path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerow(["role", "content"])
+        for m in history:
+            writer.writerow([m.get("role",""), m.get("content","")])
+    return path
 # -----------------------------
+# Gradio callbacks (UI-safe)
 # -----------------------------
+def respond(message, state, category):
+    """
+    Called by Send button or Enter.
+    Returns: cleared input, updated state, updated chatbot display (state replicated)
+    """
+    history = state or []
+    if not (message and message.strip()):
+        return "", history, history
+    summary, fusion, reference = generate_three_fold(category, message)
+    assistant_text = f"{summary}\n\n{fusion}\n\n{reference}"
+    history = append_user_assistant(history, message, assistant_text)
+    return "", history, history
+def clear_all():
+    # clear textbox, state and chatbot
+    return "", [], []
+def upload_json(filepath):
+    """Load uploaded dataset file (filepath is local path inside container)"""
+    global dataset, DATA_PATH
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return "Upload failed: root must be an object", gr.update(choices=sorted(list(dataset.keys())), value=None)
+        if "staged_responses" not in data:
+            data["staged_responses"] = []
+        dataset = data
+        DATA_PATH = os.path.basename(filepath)
+        cats = sorted([k for k in dataset.keys() if k != "staged_responses"])
+        status = f"Loaded {len(cats)} categories from {DATA_PATH}."
+        return status, gr.update(choices=cats, value=(cats[0] if cats else None))
+    except Exception as e:
+        return f"Error loading file: {e}", gr.update(choices=sorted(list(dataset.keys())), value=None)
+def stage_last_conversation(state, target_category):
+    """
+    Stage the last user + assistant pair into dataset['staged_responses']
+    (stored as {"question":..., "answer":..., "category":...})
+    """
+    if not state:
+        return "No conversation in memory."
+    last_user, last_assistant = get_last_user_and_assistant(state)
+    if not last_user:
+        return "No user message to stage."
+    entry = {"question": last_user, "answer": last_assistant or "", "category": target_category}
+    if "staged_responses" not in dataset:
+        dataset["staged_responses"] = []
+    dataset["staged_responses"].append(entry)
+    return f"Staged last Q/A into '{target_category}'."
 def download_conversation_csv(state):
+    path = write_temp_csv_from_history(state or [])
+    if not path:
+        return gr.File.update(value=None)
+    return gr.File.update(value=path)
 def download_current_dataset():
+    path = write_temp_json(dataset, suffix=".json")
+    return gr.File.update(value=path)
 # -----------------------------
+# Gradio UI (components + wiring)
 # -----------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## Campus Life — 3-fold responses, staging, CSV/JSON downloads")
+    # dropdown choices exclude staged_responses
+    category_choices = sorted([k for k in dataset.keys() if k != "staged_responses"])
+    with gr.Row():
+        category = gr.Dropdown(label="Category", choices=category_choices,
+                               value=(category_choices[0] if category_choices else None))
+    chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
+    conversation_state = gr.State([])  # holds list of {"role":..,"content":..}
+    msg = gr.Textbox(label="Your message", placeholder="Type and press Enter (or click Send)", autofocus=True)
+    send = gr.Button("Send")
+    clear = gr.Button("Clear")
+    with gr.Row():
+        stage_btn = gr.Button("Stage last Q/A to category")
+        stage_status = gr.Textbox(label="Stage status", interactive=False, value="")
     with gr.Row():
         upload = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
         download_csv_file = gr.File(label="Download CSV", interactive=True)
     # events
+    msg.submit(respond, [msg, conversation_state, category], [msg, conversation_state, chatbot])
+    send.click(respond, [msg, conversation_state, category], [msg, conversation_state, chatbot])
+    clear.click(clear_all, [], [msg, conversation_state, chatbot])
+    stage_btn.click(stage_last_conversation, [conversation_state, category], stage_status)
+    upload.upload(upload_json, upload, [upload_status, category])
+    download_csv_btn.click(download_conversation_csv, [conversation_state], download_csv_file)
+    download_json_btn.click(download_current_dataset, None, download_json_file)
 # -----------------------------
 # Startup log