Spaces:

chitech2026
/

sandbox_dev

Sleeping

App Files Files Community

igortech commited on Sep 4, 2025

Commit

f008056

verified ·

1 Parent(s): db4315a

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -170

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import string
 import os
 import datetime
 import difflib
 import gradio as gr
 # -----------------------------
@@ -12,204 +13,169 @@ import gradio as gr
 # -----------------------------
 DATA_PATH = "quotes.json"
-def load_quotes():
     if os.path.exists(DATA_PATH):
-        try:
-            with open(DATA_PATH, "r", encoding="utf-8") as f:
-                data = json.load(f)
-            # Ensure staged_responses bucket always exists
-            if "staged_responses" not in data:
-                data["staged_responses"] = []
-            print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.")
-            return data, DATA_PATH
-        except Exception as e:
-            print(f"Failed to load {DATA_PATH}: {e}")
-    # fallback: empty
-    return {"staged_responses": []}, None
-QUOTES, DATA_PATH = load_quotes()
 # -----------------------------
-# Text helpers
 # -----------------------------
-STOPWORDS = {
-    "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
-    "is","are","was","were","be","being","been","it","that","this","these","those","with",
-    "as","by","from","about","into","over","after","before","up","down","out"
-}
-POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
-NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
-punct_re = re.compile(f"[{re.escape(string.punctuation)}]")
-def normalize(text: str) -> str:
-    return punct_re.sub(" ", (text or "").lower())
-def tokenize(text: str):
-    return [t for t in normalize(text).split() if t and t not in STOPWORDS]
-def infer_sentiment(user_text: str) -> str:
-    tl = normalize(user_text)
-    has_pos = any(w in tl for w in POS_HINTS)
-    has_neg = any(w in tl for w in NEG_HINTS)
-    if has_pos and not has_neg:
-        return "positive"
-    if has_neg and not has_pos:
-        return "negative"
-    return "positive"  # default
-# -----------------------------
-# Retrieval with fuzzy matching
-# -----------------------------
-def best_match_quote(category: str, sentiment: str, user_text: str) -> str:
-    if category not in QUOTES:
-        return f"No quotes found for category '{category}'."
-    pool = QUOTES[category]
-    if not pool:
-        return f"No quotes available in '{category}'."
-    q_tokens = set(tokenize(user_text))
-    best_score = -1
-    best_quote = None
-    for entry in pool:
-        qtoks = set(tokenize(entry.get("quote", "")))
-        score = len(q_tokens & qtoks)
-        # fuzzy matching fallback
-        if score == 0:
-            for word in q_tokens:
-                matches = difflib.get_close_matches(word, qtoks, n=1, cutoff=0.8)
-                if matches:
-                    score += 1
-        if score > best_score:
-            best_score = score
-            best_quote = entry.get("quote", "")
-    if not best_quote:
-        return random.choice([e.get("quote","") for e in pool if "quote" in e])
-    return best_quote
 # -----------------------------
-# Gradio callbacks
 # -----------------------------
-def respond(message, history, category, sentiment_choice):
-    if not QUOTES:
-        bot = "No dataset loaded. Please upload a JSON file first."
-        history.append((message, bot))
-        return "", history
-    if not category:
-        bot = "Please select a category."
-        history.append((message, bot))
-        return "", history
-    # sentiment not really used with interview-style data, but kept for compatibility
-    if sentiment_choice == "auto":
-        sent = infer_sentiment(message)
-    else:
-        sent = sentiment_choice
-    bot = best_match_quote(category, sent, message)
-    history.append((message, bot))
-    return "", history
-def clear_chat():
-    return None
-def upload_json(filepath):
-    global QUOTES, DATA_PATH
-    try:
-        with open(filepath, "r", encoding="utf-8") as f:
-            data = json.load(f)
-        if not isinstance(data, dict):
-            return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[])
-        if "staged_responses" not in data:
-            data["staged_responses"] = []
-        QUOTES = data
-        DATA_PATH = os.path.basename(filepath)
-        cats = sorted(list(QUOTES.keys()))
-        status = f"Loaded {len(cats)} categories from {DATA_PATH}."
-        return status, gr.update(choices=cats, value=(cats[0] if cats else None))
-    except Exception as e:
-        return f"Error loading file: {e}", gr.update(choices=[])
-def download_current():
-    """Download dataset including staged responses."""
-    out_name = DATA_PATH or "quotes_export.json"
-    try:
-        tmp = "quotes_export.json"
-        with open(tmp, "w", encoding="utf-8") as f:
-            json.dump(QUOTES, f, indent=2, ensure_ascii=False)
-        return tmp
-    except Exception:
         return None
-def stage_conversation(history, category):
-    if not category:
-        return "Please select a category to stage into."
-    staged = QUOTES.get("staged_responses", [])
-    for msg, bot in history:
-        staged.append({
-            "category": category,
-            "user": msg,
-            "bot": bot
-        })
-    QUOTES["staged_responses"] = staged
-    return f"Staged {len(history)} exchanges into 'staged_responses'."
 # -----------------------------
 # UI
 # -----------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, Similarity Matching")
-    initial_categories = sorted(list(QUOTES.keys()))
     with gr.Row():
-        category = gr.Dropdown(
-            label="Category",
-            choices=initial_categories,
-            value=(initial_categories[0] if initial_categories else None)
-        )
-        sentiment = gr.Dropdown(
-            label="Sentiment",
-            choices=["auto", "positive", "negative"],
-            value="auto"
         )
-    chatbot = gr.Chatbot(label="Conversation", height=360, type="tuples")
-    msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
-    send = gr.Button("Send")
-    clear = gr.Button("Clear")
     with gr.Row():
-        stage_btn = gr.Button("Stage Conversation to Category")
-        stage_status = gr.Textbox(label="Stage status", interactive=False)
     with gr.Row():
-        uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
-        upload_status = gr.Textbox(label="Upload status", interactive=False)
-        downloader = gr.File(label="Download current dataset")
-    # Wire events
-    msg.submit(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
-    send.click(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
-    clear.click(clear_chat, None, chatbot, queue=False)
-    stage_btn.click(stage_conversation, [chatbot, category], stage_status)
-    uploader.upload(upload_json, uploader, [upload_status, category])
-    downloader.download(download_current)
-# -----------------------------
-# Startup log
-# -----------------------------
-print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
-if QUOTES:
-    for cat, items in QUOTES.items():
-        if isinstance(items, list):
-            print(f" - {cat}: {len(items)} entries")

 import os
 import datetime
 import difflib
+import csv
 import gradio as gr
 # -----------------------------
 # -----------------------------
 DATA_PATH = "quotes.json"
+def load_dataset():
     if os.path.exists(DATA_PATH):
+        with open(DATA_PATH, "r") as f:
+            return json.load(f)
+    return {}
+def save_dataset(data):
+    with open(DATA_PATH, "w") as f:
+        json.dump(data, f, indent=2)
+dataset = load_dataset()
 # -----------------------------
+# Conversation state
+# -----------------------------
+conversation_history = []
 # -----------------------------
+# Response logic
+# -----------------------------
+def find_best_quote(category, user_input):
+    """Fuzzy search for best matching quote in the selected category."""
+    if category not in dataset:
+        return None
+    quotes = dataset[category]
+    if not quotes:
+        return None
+    # Use difflib to score similarity
+    best_match = None
+    best_score = 0.0
+    for entry in quotes:
+        quote_text = entry["quote"]
+        score = difflib.SequenceMatcher(None, user_input.lower(), quote_text.lower()).ratio()
+        if score > best_score:
+            best_score = score
+            best_match = quote_text
+    return best_match
+def respond(user_message, category, url_domain):
+    """Generate a structured 3-part response."""
+    summary = f"It seems you're asking about {category.lower()}."
+    fusion = find_best_quote(category, user_message)
+    if not fusion:
+        fusion = "No matching experiences were found in this category."
+    url_part = "No domain specified."
+    if url_domain and url_domain.strip():
+        url_part = f"Try searching this site for more: {url_domain.strip()}"
+    response = (
+        f"**What people say:**\n{summary}\n\n"
+        f"**Combined insight:**\n{fusion}\n\n"
+        f"**Related link:**\n{url_part}"
+    )
+    # Append to history (for CSV export)
+    conversation_history.append(
+        {"role": "user", "content": user_message},
+    )
+    conversation_history.append(
+        {"role": "assistant", "content": response},
+    )
+    return conversation_history
 # -----------------------------
+# Utility: export conversation
 # -----------------------------
+def export_conversation_csv():
+    if not conversation_history:
         return None
+    filename = f"conversation_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
+    with open(filename, "w", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(["Role", "Message"])
+        for msg in conversation_history:
+            writer.writerow([msg["role"], msg["content"]])
+    return filename
+# -----------------------------
+# Stage / save dataset
+# -----------------------------
+def stage_response_to_category(category, message):
+    if not message.strip():
+        return "Message is empty."
+    if category not in dataset:
+        dataset[category] = []
+    dataset[category].append({"quote": message.strip()})
+    return f"Staged response saved under '{category}'."
+def download_current_dataset():
+    filename = f"dataset_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+    with open(filename, "w") as f:
+        json.dump(dataset, f, indent=2)
+    return filename
 # -----------------------------
 # UI
 # -----------------------------
+def clear_conversation():
+    global conversation_history
+    conversation_history = []
+    return []
+with gr.Blocks() as demo:
+    gr.Markdown("## College Life Q&A Chatbot")
     with gr.Row():
+        category_dropdown = gr.Dropdown(
+            choices=list(dataset.keys()),
+            label="Select Category",
+            value=list(dataset.keys())[0] if dataset else None
         )
+        url_input = gr.Textbox(label="Limit search to domain (optional)")
+    chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
     with gr.Row():
+        msg = gr.Textbox(
+            label="Your question",
+            placeholder="Ask something about college life..."
+        )
+        send_btn = gr.Button("Send")
     with gr.Row():
+        clear_btn = gr.Button("Clear Conversation")
+        export_btn = gr.Button("Export Conversation to CSV")
+    with gr.Row():
+        stage_box = gr.Textbox(label="Stage a response to selected category")
+        stage_btn = gr.Button("Stage Conversation to Category")
+        save_btn = gr.Button("Download Current Dataset")
+    # --- Events ---
+    send_btn.click(
+        respond,
+        inputs=[msg, category_dropdown, url_input],
+        outputs=[chatbot]
+    )
+    clear_btn.click(
+        clear_conversation,
+        outputs=[chatbot]
+    )
+    export_btn.click(
+        export_conversation_csv,
+        outputs=[gr.File(label="Download Conversation CSV")]
+    )
+    stage_btn.click(
+        stage_response_to_category,
+        inputs=[category_dropdown, stage_box],
+        outputs=[stage_box]
+    )
+    save_btn.click(
+        download_current_dataset,
+        outputs=[gr.File(label="Download Dataset JSON")]
+    )
+if __name__ == "__main__":
+    demo.launch()