Spaces:

chitech2026
/

sandbox_dev

Sleeping

App Files Files Community

igortech commited on Sep 16, 2025

Commit

2170185

verified ·

1 Parent(s): b2c8e1d

Update app.py

Browse files

Files changed (1) hide show

app.py +250 -90

app.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import json
-import difflib
-import csv
 import os
 import gradio as gr
 # -----------------------------
@@ -9,123 +12,280 @@ import gradio as gr
 # -----------------------------
 DATA_PATH = "quotes.json"
-if os.path.exists(DATA_PATH):
-    with open(DATA_PATH, "r", encoding="utf-8") as f:
-        dataset = json.load(f)
-else:
-    dataset = {"staged_responses": []}
 # -----------------------------
-# Helpers
 # -----------------------------
-def find_best_matches(user_input, category=None, n=3, threshold=0.4):
     """
-    Try to find best fuzzy matches in the dataset.
-    If category is given and fails, fallback to all categories.
     """
-    matches = []
-    search_categories = [category] if category and category in dataset else dataset.keys()
-    # First pass: search within selected category
-    for cat in search_categories:
         if cat == "staged_responses":
             continue
-        for item in dataset.get(cat, []):
-            text = item.get("quote", "")
-            score = difflib.SequenceMatcher(None, user_input.lower(), text.lower()).ratio()
-            if score >= threshold:
-                matches.append((score, text, cat))
-    # If nothing found and category was specified, search all categories
-    if not matches and category and category in dataset:
-        for cat in dataset.keys():
-            if cat == "staged_responses":
-                continue
-            for item in dataset.get(cat, []):
-                text = item.get("quote", "")
-                score = difflib.SequenceMatcher(None, user_input.lower(), text.lower()).ratio()
-                if score >= threshold:
-                    matches.append((score, text, cat))
-    # Sort and return top n
-    matches.sort(key=lambda x: x[0], reverse=True)
-    return matches[:n]
-def chatbot_response(message, history, category):
-    if not message.strip():
-        return history + [("User", "Message is empty.")]
-    best_matches = find_best_matches(message, category)
-    if best_matches:
-        responses = [f"[{cat}] {quote}" for _, quote, cat in best_matches]
-    else:
-        responses = [f"No data about {message}."]
-    history.append(("User", message))
-    for resp in responses:
-        history.append(("Bot", resp))
-    return history
-def stage_response(message, category):
-    """Stage a message into a category in dataset."""
-    if not message.strip():
-        return "Message is empty."
-    if category not in dataset:
-        dataset[category] = []
-    dataset[category].append({"quote": message})
-    return f"Message staged to category '{category}'."
-def download_json():
-    return json.dumps(dataset, indent=2, ensure_ascii=False)
-def download_csv():
-    csv_file = "dataset.csv"
-    with open(csv_file, "w", newline="", encoding="utf-8") as f:
         writer = csv.writer(f)
-        writer.writerow(["Category", "Quote"])
-        for cat, items in dataset.items():
-            if cat == "staged_responses":
-                continue
-            for item in items:
-                writer.writerow([cat, item.get("quote", "")])
-    return csv_file
-def clear_history():
-    return []
 # -----------------------------
-# UI
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎓 Campus Experience Chatbot")
-    chatbot = gr.Chatbot(label="Conversation", type="messages")
-    msg = gr.Textbox(label="Type your question here...", placeholder="Ask me anything about campus life", lines=2)
-    category = gr.Dropdown(choices=[c for c in dataset.keys() if c != "staged_responses"], label="Select Category")
     send = gr.Button("Send")
-    stage_btn = gr.Button("Stage conversation to category")
-    download_json_btn = gr.Button("Download JSON")
-    download_csv_btn = gr.Button("Download CSV")
-    clear = gr.Button("Clear Conversation")
-    send.click(chatbot_response, inputs=[msg, chatbot, category], outputs=chatbot)
-    msg.submit(chatbot_response, inputs=[msg, chatbot, category], outputs=chatbot)
-    stage_btn.click(stage_response, inputs=[msg, category], outputs=None)
-    download_json_btn.click(download_json, outputs=gr.File())
-    download_csv_btn.click(download_csv, outputs=gr.File())
-    clear.click(clear_history, outputs=chatbot)
 if __name__ == "__main__":
-    demo.launch()

 import json
 import os
+import re
+import csv
+import tempfile
+from difflib import SequenceMatcher
+import datetime
 import gradio as gr
 # -----------------------------
 # -----------------------------
 DATA_PATH = "quotes.json"
+def load_dataset():
+    if os.path.exists(DATA_PATH):
+        with open(DATA_PATH, "r", encoding="utf-8") as f:
+            data = json.load(f)
+            # ensure staged_responses exists
+            if "staged_responses" not in data:
+                data["staged_responses"] = []
+            return data
+    # default empty dataset with staged bucket
+    return {"staged_responses": []}
+dataset = load_dataset()
 # -----------------------------
+# Matching helpers
 # -----------------------------
+def normalize_text(s: str) -> str:
+    return re.sub(r"\W+", " ", (s or "").lower()).strip()
+def tokens(s: str):
+    return set(t for t in normalize_text(s).split() if t)
+def score_quote(user_input: str, quote_text: str):
+    """
+    Score a quote vs user input:
+      - token overlap gets a boosted score
+      - otherwise fallback to SequenceMatcher ratio
     """
+    u_toks = tokens(user_input)
+    q_toks = tokens(quote_text)
+    overlap = len(u_toks & q_toks)
+    if overlap > 0:
+        # strong signal: >=1.0 plus a small bonus for proportion overlap
+        return 1.0 + (overlap / max(1, len(q_toks)))
+    # fuzzy fallback
+    return SequenceMatcher(None, user_input.lower(), quote_text.lower()).ratio()
+def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
+    """
+    Find best matches:
+     - try within `category` first (if provided)
+     - if none above `threshold`, search across all categories
+     - return list of tuples (score, quote, category)
+     - if nothing passes threshold, return empty list
     """
+    if not user_input or not user_input.strip():
+        return []
+    def score_list_for_cat(cat):
+        scored = []
+        for item in dataset.get(cat, []):
+            q = item.get("quote", "")
+            s = score_quote(user_input, q)
+            scored.append((s, q, cat))
+        return scored
+    # 1) try selected category first
+    scored = []
+    if category and category in dataset and category != "staged_responses":
+        scored = score_list_for_cat(category)
+        scored.sort(key=lambda x: x[0], reverse=True)
+        if scored and scored[0][0] >= threshold:
+            return scored[:top_n]
+    # 2) fallback: search all categories
+    all_scored = []
+    for cat in dataset.keys():
         if cat == "staged_responses":
             continue
+        all_scored.extend(score_list_for_cat(cat))
+    all_scored.sort(key=lambda x: x[0], reverse=True)
+    if all_scored and all_scored[0][0] >= threshold:
+        return all_scored[:top_n]
+    # 3) nothing
+    return []
+# -----------------------------
+# Response generation
+# -----------------------------
+def generate_three_fold(category, user_text):
+    matches = find_best_quotes(category, user_text, top_n=3, threshold=0.15)
+    if not matches:
+        # Unknown fallback
+        unknown_msg = f"No data about {user_text} (unknown)."
+        return unknown_msg, unknown_msg, "Reference: None"
+    # Build summary from top match's first sentence
+    top_quote = matches[0][1]
+    first_sentence = top_quote.split(".")[0].strip()
+    summary = f"Summary: {first_sentence}."
+    # Fusion: join unique quotes (up to 3)
+    fused = " ".join(dict.fromkeys([m[1] for m in matches]))  # preserve order, remove duplicates
+    fusion = f"Fusion: {fused}"
+    # Reference: simple placeholder with category and top matched category
+    top_cat = matches[0][2]
+    reference = f"Reference: Example search for '{category}' (top match from '{top_cat}')."
+    return summary, fusion, reference
+# -----------------------------
+# Conversation & staging utilities
+# -----------------------------
+def append_user_assistant(history, user_text, assistant_text):
+    # history is a list of message dicts: {"role": "user"/"assistant", "content": "..."}
+    history = history or []
+    history.append({"role": "user", "content": user_text})
+    history.append({"role": "assistant", "content": assistant_text})
+    return history
+def get_last_user_and_assistant(history):
+    # Find the last user message and the first assistant message that follows it
+    last_user = None
+    last_assistant = None
+    if not history:
+        return None, None
+    # traverse backwards
+    for i in range(len(history)-1, -1, -1):
+        msg = history[i]
+        if last_assistant is None and msg["role"] == "assistant":
+            last_assistant = msg["content"]
+        if msg["role"] == "user":
+            last_user = msg["content"]
+            # once we have both, break
+            break
+    # if assistant message came *before* last user (unlikely in our flow), try to find assistant after user
+    if last_user and not last_assistant:
+        for i in range(len(history)-1, -1, -1):
+            if history[i]["role"] == "assistant":
+                last_assistant = history[i]["content"]
+                break
+    return last_user, last_assistant
+# -----------------------------
+# File helpers (use temp files)
+# -----------------------------
+def write_temp_json(obj, suffix=".json"):
+    tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+    path = tf.name
+    tf.close()
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(obj, f, indent=2, ensure_ascii=False)
+    return path
+def write_temp_csv_from_history(history, suffix=".csv"):
+    if not history:
+        return None
+    tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+    path = tf.name
+    tf.close()
+    with open(path, "w", newline="", encoding="utf-8") as f:
         writer = csv.writer(f)
+        writer.writerow(["role", "content"])
+        for m in history:
+            writer.writerow([m.get("role",""), m.get("content","")])
+    return path
+# -----------------------------
+# Gradio callbacks (UI-safe)
+# -----------------------------
+def respond(message, state, category):
+    """
+    Called by Send button or Enter.
+    Returns: cleared input, updated state, updated chatbot display (state replicated)
+    """
+    history = state or []
+    if not (message and message.strip()):
+        return "", history, history
+    # generate 3-fold reply
+    summary, fusion, reference = generate_three_fold(category, message)
+    assistant_text = f"{summary}\n\n{fusion}\n\n{reference}"
+    history = append_user_assistant(history, message, assistant_text)
+    return "", history, history
+def clear_all():
+    # clears textbox, state and chatbot
+    return "", [], []
+def upload_json(filepath):
+    """Load uploaded dataset file (filepath is local path inside container)"""
+    global dataset, DATA_PATH
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return "Upload failed: root must be an object", gr.update(choices=sorted(list(dataset.keys())), value=None)
+        # ensure staged_responses exists
+        if "staged_responses" not in data:
+            data["staged_responses"] = []
+        dataset = data
+        DATA_PATH = os.path.basename(filepath)
+        cats = sorted([k for k in dataset.keys() if k != "staged_responses"])
+        status = f"Loaded {len(cats)} categories from {DATA_PATH}."
+        return status, gr.update(choices=cats, value=(cats[0] if cats else None))
+    except Exception as e:
+        return f"Error loading file: {e}", gr.update(choices=sorted(list(dataset.keys())), value=None)
+def stage_last_conversation(state, target_category):
+    """
+    Stage the last user + assistant pair into dataset['staged_responses']
+    (stored as {"question":..., "answer":..., "category":...})
+    """
+    if not state:
+        return "No conversation in memory."
+    last_user, last_assistant = get_last_user_and_assistant(state)
+    if not last_user:
+        return "No user message to stage."
+    entry = {"question": last_user, "answer": last_assistant or "", "category": target_category}
+    if "staged_responses" not in dataset:
+        dataset["staged_responses"] = []
+    dataset["staged_responses"].append(entry)
+    return f"Staged last Q/A into '{target_category}'."
+def download_conversation_csv(state):
+    # return gr.File.update(value=path) so the File component triggers download
+    path = write_temp_csv_from_history(state or [])
+    if not path:
+        return gr.File.update(value=None)
+    return gr.File.update(value=path)
+def download_current_dataset():
+    # include staged_responses in dataset (already in memory)
+    path = write_temp_json(dataset, suffix=".json")
+    return gr.File.update(value=path)
 # -----------------------------
+# Gradio UI (components + wiring)
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## Campus Life — 3-fold responses, staging, CSV/JSON downloads")
+    # dropdown choices exclude staged_responses
+    category_choices = sorted([k for k in dataset.keys() if k != "staged_responses"])
+    with gr.Row():
+        category = gr.Dropdown(label="Category", choices=category_choices,
+                               value=(category_choices[0] if category_choices else None))
+    chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
+    state = gr.State([])  # holds list of {"role":..,"content":..}
+    msg = gr.Textbox(label="Your message", placeholder="Type and press Enter (or click Send)", autofocus=True)
     send = gr.Button("Send")
+    clear = gr.Button("Clear")
+    with gr.Row():
+        stage_btn = gr.Button("Stage last Q/A to category")
+        stage_status = gr.Textbox(label="Stage status", interactive=False, value="")
+    with gr.Row():
+        upload = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
+        upload_status = gr.Textbox(label="Upload status", interactive=False, value="")
+        download_json_btn = gr.Button("Download current dataset (JSON)")
+        download_json_file = gr.File(label="Download JSON", interactive=False)
+        download_csv_btn = gr.Button("Download conversation (CSV)")
+        download_csv_file = gr.File(label="Download CSV", interactive=False)
+    # events
+    msg.submit(respond, [msg, state, category], [msg, state, chatbot])
+    send.click(respond, [msg, state, category], [msg, state, chatbot])
+    clear.click(clear_all, [], [msg, state, chatbot])
+    stage_btn.click(stage_last_conversation, [state, category], stage_status)
+    upload.upload(upload_json, upload, [upload_status, category])
+    download_csv_btn.click(download_conversation_csv, state, download_csv_file)
+    download_json_btn.click(download_current_dataset, None, download_json_file)
+# -----------------------------
+# Startup log
+# -----------------------------
+print("===== Application startup =====")
+print(f"Dataset categories: {[k for k in dataset.keys()]}")
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)