Spaces:

chitech2026
/

sandbox_dev

Sleeping

App Files Files Community

igortech commited on Sep 16, 2025

Commit

b2c8e1d

verified ·

1 Parent(s): 8885a6f

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -149

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import json
-import os
 import csv
-import re
-from difflib import SequenceMatcher
 import gradio as gr
 # -----------------------------
@@ -10,175 +9,123 @@ import gradio as gr
 # -----------------------------
 DATA_PATH = "quotes.json"
-def load_dataset():
-    if os.path.exists(DATA_PATH):
-        with open(DATA_PATH, "r") as f:
-            data = json.load(f)
-            print(f"Loaded dataset from {DATA_PATH} with {len(data.keys())} categories.")
-            for cat, quotes in data.items():
-                print(f" - {cat}: {len(quotes)} entries")
-            return data
-    else:
-        print("No dataset found, starting with empty structure")
-        return {"staged_responses": []}
-dataset = load_dataset()
-# -----------------------------
-# Matching logic
-# -----------------------------
-def normalize_text(s: str) -> str:
-    return re.sub(r'\W+', ' ', (s or "").lower()).strip()
-def tokens(s: str):
-    return set(t for t in normalize_text(s).split() if t)
-def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
-    if category not in dataset or not dataset[category]:
-        return [f"No data about {user_input} (unknown)."]
-    user_toks = tokens(user_input)
-    scored = []
-    for entry in dataset[category]:
-        qtext = entry.get("quote", "")
-        q_toks = tokens(qtext)
-        # Token overlap match
-        overlap = len(user_toks & q_toks)
-        if overlap > 0:
-            score = 1.0 + (overlap / max(1, len(q_toks)))
-        else:
-            # Fuzzy fallback
-            score = SequenceMatcher(None, user_input.lower(), qtext.lower()).ratio()
-        scored.append((score, qtext))
-    scored.sort(key=lambda x: x[0], reverse=True)
-    best_score = scored[0][0] if scored else 0.0
-    if best_score < threshold:
-        return [f"No data about {user_input} (unknown)."]
-    return [q for _s, q in scored[:top_n]]
 # -----------------------------
-# Response generation
 # -----------------------------
-def generate_response(category, user_input):
-    best_quotes = find_best_quotes(category, user_input, top_n=3)
-    if len(best_quotes) == 1 and best_quotes[0].startswith("No data"):
-        return (
-            f"Summary: {best_quotes[0]}",
-            f"Fusion: {best_quotes[0]}",
-            f"Reference: None"
-        )
-    # 1. Summary
-    summary = f"Summary: This is what people say about {category.lower()}."
-    # 2. Fusion
-    fusion = "Fusion: " + " ".join(best_quotes)
-    # 3. Reference
-    reference = f"Reference: Example article about {category.lower()} - https://example.com/{category.lower()}"
-    return summary, fusion, reference
-# -----------------------------
-# Gradio logic
-# -----------------------------
-conversation_history = []
-def chat(user_input, category):
-    summary, fusion, reference = generate_response(category, user_input)
-    # 3-fold response
-    bot_response = f"{summary}\n\n{fusion}\n\n{reference}"
-    conversation_history.append({"role": "user", "content": user_input})
-    conversation_history.append({"role": "assistant", "content": bot_response})
-    return conversation_history
-def clear_conversation():
-    conversation_history.clear()
-    return conversation_history
-# -----------------------------
-# CSV Export
-# -----------------------------
-def export_conversation():
-    if not conversation_history:
-        return None
-    filename = "conversation.csv"
-    with open(filename, "w", newline="") as f:
         writer = csv.writer(f)
-        writer.writerow(["role", "content"])
-        for msg in conversation_history:
-            writer.writerow([msg["role"], msg["content"]])
-    return filename
-# -----------------------------
-# Save staged responses
-# -----------------------------
-def stage_conversation(category):
-    if not conversation_history:
-        return None
-    if "staged_responses" not in dataset:
-        dataset["staged_responses"] = []
-    staged_entry = {
-        "category": category,
-        "conversation": conversation_history.copy()
-    }
-    dataset["staged_responses"].append(staged_entry)
-    # Save to file for download
-    staged_file = "staged_responses.json"
-    with open(staged_file, "w") as f:
-        json.dump(dataset, f, indent=2)
-    return staged_file
 # -----------------------------
 # UI
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## Campus Life Chatbot")
-    with gr.Row():
-        category = gr.Dropdown(
-            choices=list(dataset.keys()),
-            label="Select Category",
-            value=list(dataset.keys())[0] if dataset else None
-        )
-    chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
-    with gr.Row():
-        user_input = gr.Textbox(
-            label="Type your message",
-            placeholder="Ask about food, housing, professors...",
-            scale=4
-        )
-        send_btn = gr.Button("Send", scale=1)
-    with gr.Row():
-        clear_btn = gr.Button("Clear")
-        export_btn = gr.Button("Export Conversation to CSV")
-        stage_btn = gr.Button("Stage Conversation to Category")
-        download_btn = gr.Button("Download Updated Dataset")
-    # Event wiring
-    send_btn.click(chat, [user_input, category], chatbot)
-    user_input.submit(chat, [user_input, category], chatbot)
-    clear_btn.click(clear_conversation, None, chatbot)
-    export_btn.click(export_conversation, None, gr.File())
-    stage_btn.click(stage_conversation, category, gr.File())
-    download_btn.click(lambda: DATA_PATH, None, gr.File())
 if __name__ == "__main__":
     demo.launch()

 import json
+import difflib
 import csv
+import os
 import gradio as gr
 # -----------------------------
 # -----------------------------
 DATA_PATH = "quotes.json"
+if os.path.exists(DATA_PATH):
+    with open(DATA_PATH, "r", encoding="utf-8") as f:
+        dataset = json.load(f)
+else:
+    dataset = {"staged_responses": []}
 # -----------------------------
+# Helpers
 # -----------------------------
+def find_best_matches(user_input, category=None, n=3, threshold=0.4):
+    """
+    Try to find best fuzzy matches in the dataset.
+    If category is given and fails, fallback to all categories.
+    """
+    matches = []
+    search_categories = [category] if category and category in dataset else dataset.keys()
+    # First pass: search within selected category
+    for cat in search_categories:
+        if cat == "staged_responses":
+            continue
+        for item in dataset.get(cat, []):
+            text = item.get("quote", "")
+            score = difflib.SequenceMatcher(None, user_input.lower(), text.lower()).ratio()
+            if score >= threshold:
+                matches.append((score, text, cat))
+    # If nothing found and category was specified, search all categories
+    if not matches and category and category in dataset:
+        for cat in dataset.keys():
+            if cat == "staged_responses":
+                continue
+            for item in dataset.get(cat, []):
+                text = item.get("quote", "")
+                score = difflib.SequenceMatcher(None, user_input.lower(), text.lower()).ratio()
+                if score >= threshold:
+                    matches.append((score, text, cat))
+    # Sort and return top n
+    matches.sort(key=lambda x: x[0], reverse=True)
+    return matches[:n]
+def chatbot_response(message, history, category):
+    if not message.strip():
+        return history + [("User", "Message is empty.")]
+    best_matches = find_best_matches(message, category)
+    if best_matches:
+        responses = [f"[{cat}] {quote}" for _, quote, cat in best_matches]
+    else:
+        responses = [f"No data about {message}."]
+    history.append(("User", message))
+    for resp in responses:
+        history.append(("Bot", resp))
+    return history
+def stage_response(message, category):
+    """Stage a message into a category in dataset."""
+    if not message.strip():
+        return "Message is empty."
+    if category not in dataset:
+        dataset[category] = []
+    dataset[category].append({"quote": message})
+    return f"Message staged to category '{category}'."
+def download_json():
+    return json.dumps(dataset, indent=2, ensure_ascii=False)
+def download_csv():
+    csv_file = "dataset.csv"
+    with open(csv_file, "w", newline="", encoding="utf-8") as f:
         writer = csv.writer(f)
+        writer.writerow(["Category", "Quote"])
+        for cat, items in dataset.items():
+            if cat == "staged_responses":
+                continue
+            for item in items:
+                writer.writerow([cat, item.get("quote", "")])
+    return csv_file
+def clear_history():
+    return []
 # -----------------------------
 # UI
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎓 Campus Experience Chatbot")
+    chatbot = gr.Chatbot(label="Conversation", type="messages")
+    msg = gr.Textbox(label="Type your question here...", placeholder="Ask me anything about campus life", lines=2)
+    category = gr.Dropdown(choices=[c for c in dataset.keys() if c != "staged_responses"], label="Select Category")
+    send = gr.Button("Send")
+    stage_btn = gr.Button("Stage conversation to category")
+    download_json_btn = gr.Button("Download JSON")
+    download_csv_btn = gr.Button("Download CSV")
+    clear = gr.Button("Clear Conversation")
+    send.click(chatbot_response, inputs=[msg, chatbot, category], outputs=chatbot)
+    msg.submit(chatbot_response, inputs=[msg, chatbot, category], outputs=chatbot)
+    stage_btn.click(stage_response, inputs=[msg, category], outputs=None)
+    download_json_btn.click(download_json, outputs=gr.File())
+    download_csv_btn.click(download_csv, outputs=gr.File())
+    clear.click(clear_history, outputs=chatbot)
 if __name__ == "__main__":
     demo.launch()