Spaces:

chitech2026
/

sandbox_dev

Sleeping

App Files Files Community

igortech commited on Sep 3, 2025

Commit

a18d57c

verified ·

1 Parent(s): 8b86f79

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -83

app.py CHANGED Viewed

@@ -1,104 +1,217 @@
 import json
 import os
-import difflib
 import gradio as gr
-# Path to your JSON data
 DATA_PATH = "quotes.json"
-# Load quotes
 def load_quotes():
     if os.path.exists(DATA_PATH):
-        with open(DATA_PATH, "r", encoding="utf-8") as f:
-            return json.load(f)
     return {}
 QUOTES = load_quotes()
-# Fuzzy match logic (improved with tokenization)
-def fuzzy_match(query, choices):
-    tokens = query.lower().split()
-    best_score = 0
-    best_match = None
-    for choice in choices:
-        score = difflib.SequenceMatcher(None, query.lower(), choice.lower()).ratio()
-        token_overlap = len(set(tokens) & set(choice.lower().split())) / max(len(tokens), 1)
-        combined_score = (score + token_overlap) / 2
-        if combined_score > best_score:
-            best_score = combined_score
-            best_match = choice
-    return best_match, best_score
-# Handle user query
-def handle_query(user_message, category, sentiment, url_restrict):
-    responses = []
-    # Validate category
     if category not in QUOTES:
-        return [{"role": "assistant", "content": "Category not found in dataset."}]
-    # Collect relevant quotes
-    choices = []
-    if sentiment == "auto":
-        for sent in QUOTES[category]:
-            choices.extend(QUOTES[category][sent])
-    else:
-        choices = QUOTES[category].get(sentiment, [])
-    if not choices:
-        return [{"role": "assistant", "content": "No quotes available for this category/sentiment."}]
-    # Fuzzy match query against quotes
-    best_match, score = fuzzy_match(user_message, choices)
-    # === 3-tier response ===
-    # Tier 1: Summary (simple echo of category/topic)
-    summary = f"**Topic Summary:** This question seems related to *{category.replace('_', ' ')}*."
-    # Tier 2: "What real people say"
-    if best_match and score > 0.3:
-        details = f"**What real people say:**\n{best_match}"
-    else:
-        details = "**What real people say:**\nSorry, no close match found."
-    # Tier 3: External article reference (stub)
-    if url_restrict.strip():
-        external = f"**Similar articles (restricted to {url_restrict}):**\n[Search results on {url_restrict}](https://www.google.com/search?q={user_message}+site:{url_restrict})"
     else:
-        external = "**Similar articles:**\n[Search on Google](https://www.google.com/search?q=" + user_message.replace(" ", "+") + ")"
-    responses.extend([
-        {"role": "assistant", "content": summary},
-        {"role": "assistant", "content": details},
-        {"role": "assistant", "content": external}
-    ])
-    return responses
-# Build Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("## 🎓 College Life Q&A Chatbot")
-    with gr.Row():
-        category = gr.Dropdown(choices=list(QUOTES.keys()), label="Category", interactive=True)
-        sentiment = gr.Dropdown(choices=["auto", "positive", "negative"], value="auto", label="Sentiment", interactive=True)
-    url_restrict = gr.Textbox(label="Restrict search to domain (optional)", placeholder="e.g., nytimes.com")
-    chatbot = gr.Chatbot(label="Conversation", type="messages", height=400)
-    msg = gr.Textbox(label="Ask a question")
     with gr.Row():
-        clear_btn = gr.Button("Clear Chat")
-    def respond(message, chat_history, category, sentiment, url_restrict):
-        bot_msgs = handle_query(message, category, sentiment, url_restrict)
-        chat_history.append({"role": "user", "content": message})
-        chat_history.extend(bot_msgs)
-        return "", chat_history
-    msg.submit(respond, [msg, chatbot, category, sentiment, url_restrict], [msg, chatbot])
-    clear_btn.click(lambda: [], None, chatbot)
 if __name__ == "__main__":
-    demo.launch()

 import json
+import random
+import re
+import string
 import os
+import datetime
+import csv
 import gradio as gr
+from difflib import SequenceMatcher
+# -----------------------------
+# Config / data loading
+# -----------------------------
 DATA_PATH = "quotes.json"
 def load_quotes():
     if os.path.exists(DATA_PATH):
+        try:
+            with open(DATA_PATH, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if isinstance(data, dict):
+                print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.")
+                return data
+        except Exception as e:
+            print(f"Failed to load {DATA_PATH}: {e}")
+    print("No dataset file found. Upload one via the UI.")
     return {}
 QUOTES = load_quotes()
+# -----------------------------
+# Text helpers
+# -----------------------------
+STOPWORDS = {
+    "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
+    "is","are","was","were","be","being","been","it","that","this","these","those","with",
+    "as","by","from","about","into","over","after","before","up","down","out"
+}
+POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
+NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
+punct_re = re.compile(f"[{re.escape(string.punctuation)}]")
+def normalize(text: str) -> str:
+    return punct_re.sub(" ", (text or "").lower())
+def tokenize(text: str):
+    return [t for t in normalize(text).split() if t and t not in STOPWORDS]
+def infer_sentiment(user_text: str) -> str:
+    tl = normalize(user_text)
+    has_pos = any(w in tl for w in POS_HINTS)
+    has_neg = any(w in tl for w in NEG_HINTS)
+    if has_pos and not has_neg:
+        return "positive"
+    if has_neg and not has_pos:
+        return "negative"
+    return "positive"  # default
+# -----------------------------
+# Retrieval (with fuzzy match)
+# -----------------------------
+def similarity(a, b):
+    return SequenceMatcher(None, a, b).ratio()
+def best_match_quote(category: str, sentiment: str, user_text: str) -> str:
     if category not in QUOTES:
+        return f"No quotes found for category '{category}'."
+    if sentiment not in QUOTES[category]:
+        return f"No quotes found for sentiment '{sentiment}' in category '{category}'."
+    pool = QUOTES[category][sentiment]
+    if not pool:
+        return f"No quotes available in '{category}' → '{sentiment}'."
+    best_score = -1
+    best_quote = None
+    for quote in pool:
+        score = similarity(user_text.lower(), quote.lower())
+        if score > best_score:
+            best_score = score
+            best_quote = quote
+    if best_score < 0.3:  # fuzzy threshold
+        return "I don’t have data on that specific question."
+    return best_quote
+# -----------------------------
+# Gradio callbacks
+# -----------------------------
+conversation_log = []  # keep all turns for export
+def respond(message, history, category, sentiment_choice):
+    if not QUOTES:
+        bot = "No dataset loaded. Please upload a JSON file first."
+        history.append((message, bot))
+        return "", history
+    if not category:
+        bot = "Please select a category."
+        history.append((message, bot))
+        return "", history
+    if sentiment_choice == "auto":
+        sent = infer_sentiment(message)
     else:
+        sent = sentiment_choice
+    bot = best_match_quote(category, sent, message)
+    history.append((message, bot))
+    # log turn for export
+    conversation_log.append({
+        "timestamp": datetime.datetime.now().isoformat(),
+        "category": category,
+        "sentiment": sent,
+        "user_message": message,
+        "bot_response": bot,
+    })
+    return "", history
+def clear_chat():
+    return None
+def upload_json(filepath):
+    global QUOTES
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[])
+        QUOTES = data
+        cats = sorted(list(QUOTES.keys()))
+        status = f"Loaded {len(cats)} categories."
+        return status, gr.update(choices=cats, value=(cats[0] if cats else None))
+    except Exception as e:
+        return f"Error loading file: {e}", gr.update(choices=[])
+def download_current():
+    out_name = DATA_PATH or "quotes_export.json"
+    try:
+        if DATA_PATH and os.path.exists(DATA_PATH):
+            return DATA_PATH
+        tmp = "quotes_export.json"
+        with open(tmp, "w", encoding="utf-8") as f:
+            json.dump(QUOTES, f, indent=2, ensure_ascii=False)
+        return tmp
+    except Exception:
+        return None
+def export_conversation():
+    filename = "conversation_log.csv"
+    try:
+        with open(filename, "w", newline="", encoding="utf-8") as csvfile:
+            fieldnames = ["timestamp", "category", "sentiment", "user_message", "bot_response"]
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(conversation_log)
+        return filename
+    except Exception as e:
+        print(f"Error exporting CSV: {e}")
+        return None
+# -----------------------------
+# UI
+# -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, Similarity Matching")
+    initial_categories = sorted(list(QUOTES.keys()))
     with gr.Row():
+        category = gr.Dropdown(
+            label="Category",
+            choices=initial_categories,
+            value=(initial_categories[0] if initial_categories else None)
+        )
+        sentiment = gr.Dropdown(
+            label="Sentiment",
+            choices=["auto", "positive", "negative"],
+            value="auto"
+        )
+    chatbot = gr.Chatbot(label="Conversation", height=360, type="tuples")
+    msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
+    send = gr.Button("Send")
+    clear = gr.Button("Clear")
+    with gr.Row():
+        uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
+        upload_status = gr.Textbox(label="Upload status", interactive=False)
+        downloader = gr.File(label="Download current dataset")
+        csv_exporter = gr.File(label="Export conversation (.csv)")
+    # Wire events
+    msg.submit(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
+    send.click(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
+    clear.click(clear_chat, None, chatbot, queue=False)
+    uploader.upload(upload_json, uploader, [upload_status, category])
+    downloader.download(download_current)
+    csv_exporter.download(export_conversation)
+# -----------------------------
+# Startup log
+# -----------------------------
+print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
+if QUOTES:
+    for cat, sents in QUOTES.items():
+        p = len(sents.get("positive", []))
+        n = len(sents.get("negative", []))
+        print(f" - {cat}: {p} positive, {n} negative")
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)