Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import re | |
| import csv | |
| import tempfile | |
| from rapidfuzz import fuzz | |
| import datetime | |
| import gradio as gr | |
| # ----------------------------- | |
| # Config / data loading | |
| # ----------------------------- | |
| DATA_PATH = "quotes.json" | |
| def load_quotes(): | |
| if os.path.exists(DATA_PATH): | |
| try: | |
| with open(DATA_PATH, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| if isinstance(data, dict): | |
| print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.") | |
| return data | |
| except Exception as e: | |
| print(f"Failed to load {DATA_PATH}: {e}") | |
| print("No dataset file found. Upload one via the UI.") | |
| return {} | |
| QUOTES = load_quotes() | |
| # ----------------------------- | |
| # Text helpers | |
| # ----------------------------- | |
| STOPWORDS = { | |
| "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for", | |
| "is","are","was","were","be","being","been","it","that","this","these","those","with", | |
| "as","by","from","about","into","over","after","before","up","down","out" | |
| } | |
| POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"} | |
| NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"} | |
| punct_re = re.compile(r"[{}]".format(re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""))) | |
| def normalize(text: str) -> str: | |
| return punct_re.sub(" ", (text or "").lower()) | |
| def tokenize(text: str): | |
| return [t for t in normalize(text).split() if t and t not in STOPWORDS] | |
| def infer_sentiment(user_text: str) -> str: | |
| tl = normalize(user_text) | |
| has_pos = any(w in tl for w in POS_HINTS) | |
| has_neg = any(w in tl for w in NEG_HINTS) | |
| if has_pos and not has_neg: | |
| return "positive" | |
| if has_neg and not has_pos: | |
| return "negative" | |
| return "positive" | |
| # ----------------------------- | |
| # Retrieval | |
| # ----------------------------- | |
| def best_match_quote(user_text: str) -> str: | |
| best_score = 0 | |
| best_quote = None | |
| for cat, quotes_list in QUOTES.items(): | |
| for quote_entry in quotes_list: | |
| quote = quote_entry.get("quote", "") | |
| if not quote.strip(): | |
| continue | |
| score = fuzz.partial_ratio(user_text.lower(), quote.lower()) | |
| if score > best_score: | |
| best_score = score | |
| best_quote = quote | |
| if best_score < 30 or best_quote is None: | |
| return f"No data about '{user_text}'" | |
| return best_quote | |
| # ----------------------------- | |
| # Gradio callbacks | |
| # ----------------------------- | |
| def respond(message, history, category): | |
| if not QUOTES: | |
| bot = "No dataset loaded. Please upload a JSON file first." | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": bot}) | |
| return "", history | |
| if not category: | |
| bot = "Please select a category." | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": bot}) | |
| return "", history | |
| quote = best_match_quote(message) | |
| # 3-fold response | |
| summary = quote.split(". ")[0] + "." if "." in quote else quote | |
| detail = quote | |
| unknown = "" | |
| if "No data about" in quote: | |
| unknown = quote | |
| bot_text = f"Summary:\n{summary}\n\nWhat real people say:\n{detail}" | |
| if unknown: | |
| bot_text += f"\n\n{unknown}" | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": bot_text}) | |
| return "", history | |
| def clear_chat(): | |
| return None | |
| def upload_json(filepath): | |
| global QUOTES, DATA_PATH | |
| try: | |
| with open(filepath, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| if not isinstance(data, dict): | |
| return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[]) | |
| QUOTES = data | |
| DATA_PATH = os.path.basename(filepath) | |
| cats = sorted(list(QUOTES.keys())) | |
| status = f"Loaded {len(cats)} categories from {DATA_PATH}." | |
| return status, gr.update(choices=cats, value=(cats[0] if cats else None)) | |
| except Exception as e: | |
| return f"Error loading file: {e}", gr.update(choices=[]) | |
| def download_current_json(): | |
| tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json") | |
| with open(tmp_file.name, "w", encoding="utf-8") as f: | |
| json.dump(QUOTES, f, indent=2, ensure_ascii=False) | |
| return tmp_file.name | |
| def download_conversation_csv(history): | |
| tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") | |
| with open(tmp_file.name, "w", newline="", encoding="utf-8") as f: | |
| writer = csv.writer(f) | |
| writer.writerow(["role", "message"]) | |
| for msg in history: | |
| writer.writerow([msg.get("role"), msg.get("content")]) | |
| return tmp_file.name | |
| # ----------------------------- | |
| # UI | |
| # ----------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, Fuzzy Matching") | |
| initial_categories = sorted(list(QUOTES.keys())) | |
| with gr.Row(): | |
| category = gr.Dropdown( | |
| label="Category", | |
| choices=initial_categories, | |
| value=(initial_categories[0] if initial_categories else None) | |
| ) | |
| chatbot = gr.Chatbot(label="Conversation", height=360, type="messages") | |
| msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True) | |
| send = gr.Button("Send") | |
| clear = gr.Button("Clear") | |
| with gr.Row(): | |
| upload_btn = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath") | |
| upload_status = gr.Textbox(label="Upload status", interactive=False) | |
| # New download system | |
| with gr.Row(): | |
| download_json_btn = gr.Button("Download current dataset (.json)") | |
| download_csv_btn = gr.Button("Export conversation to CSV") | |
| download_json_file = gr.File(label="JSON download") | |
| download_csv_file = gr.File(label="CSV download") | |
| # Events | |
| msg.submit(respond, [msg, chatbot, category], [msg, chatbot]) | |
| send.click(respond, [msg, chatbot, category], [msg, chatbot]) | |
| clear.click(clear_chat, None, chatbot, queue=False) | |
| upload_btn.upload(upload_json, upload_btn, [upload_status, category]) | |
| # Fixed download events using Button -> File | |
| download_json_btn.click(fn=download_current_json, inputs=None, outputs=download_json_file) | |
| download_csv_btn.click(fn=download_conversation_csv, inputs=chatbot, outputs=download_csv_file) | |
| # ----------------------------- | |
| # Startup log | |
| # ----------------------------- | |
| print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====") | |
| if QUOTES: | |
| for cat, entries in QUOTES.items(): | |
| print(f" - {cat}: {len(entries)} entries") | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |