Spaces:

chitech2026
/

sandbox_dev

Sleeping

App Files Files Community

igortech commited on Sep 16, 2025

Commit

35eb385

verified ·

1 Parent(s): 92c739f

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -95

app.py CHANGED Viewed

@@ -3,136 +3,194 @@ import os
 import re
 import csv
 import tempfile
-import datetime
 from difflib import SequenceMatcher
 import gradio as gr
 # -----------------------------
 # Config / data loading
 # -----------------------------
 DATA_PATH = "quotes.json"
-def load_dataset():
     if os.path.exists(DATA_PATH):
-        with open(DATA_PATH, "r", encoding="utf-8") as f:
-            return json.load(f)
-    return {"staged_responses": []}
-def save_dataset(data):
-    with open(DATA_PATH, "w", encoding="utf-8") as f:
-        json.dump(data, f, indent=2, ensure_ascii=False)
-dataset = load_dataset()
 # -----------------------------
-# Core logic
 # -----------------------------
-def find_best_matches(user_input, dataset, top_n=3, threshold=0.3):
-    matches = []
-    for category, quotes in dataset.items():
-        if category == "staged_responses":
-            continue
-        for entry in quotes:
-            quote = entry["quote"]
-            score = SequenceMatcher(None, user_input.lower(), quote.lower()).ratio()
-            matches.append((score, category, quote))
-    matches.sort(key=lambda x: x[0], reverse=True)
-    return [m for m in matches if m[0] >= threshold][:top_n]
-def generate_response(message, history):
-    matches = find_best_matches(message, dataset)
-    if not matches:
-        return (
-            history
-            + [{"role": "assistant", "content": f"No data about {message}."}]
-        )
-    responses = []
-    for score, category, quote in matches:
-        responses.append(f"Category: {category}\nWhat real people say:\n{quote}")
-    reply = "\n\n".join(responses)
-    return history + [{"role": "assistant", "content": reply}]
-# -----------------------------
-# Conversation & staging
-# -----------------------------
-def stage_conversation(history, category):
-    if not history:
-        return "No conversation to stage."
-    convo_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in history])
-    new_entry = {"quote": convo_text}
-    if "staged_responses" not in dataset:
-        dataset["staged_responses"] = []
-    dataset["staged_responses"].append(new_entry)
-    save_dataset(dataset)
-    return f"Conversation staged under {category}."
 # -----------------------------
-# Download helpers
 # -----------------------------
 def download_conversation_csv(history):
     if not history:
-        return None
-    tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8")
-    writer = csv.writer(tmpfile)
-    writer.writerow(["role", "content"])
-    for msg in history:
-        writer.writerow([msg["role"], msg["content"]])
-    tmpfile.close()
-    return tmpfile.name
-def download_dataset():
-    tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8")
-    json.dump(dataset, tmpfile, indent=2, ensure_ascii=False)
-    tmpfile.close()
-    return tmpfile.name
 # -----------------------------
-# Gradio UI
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# Campus Conversation Bot")
-    chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
-    msg = gr.Textbox(label="Type your question", placeholder="Ask me something...", container=True)
     with gr.Row():
-        clear_btn = gr.Button("Clear")
-        export_csv_btn = gr.Button("Export Conversation to CSV")
-        download_json_btn = gr.Button("Download Current Dataset")
-    with gr.Row():
-        category_dropdown = gr.Dropdown(choices=list(dataset.keys()), label="Choose category to stage", interactive=True)
-        stage_btn = gr.Button("Stage Conversation to Category")
-    # Events
-    msg.submit(generate_response, [msg, chatbot], chatbot)
-    msg.submit(lambda: "", None, msg)  # clear textbox on Enter
-    clear_btn.click(lambda: [], None, chatbot)
-    export_csv_file = gr.File(label="Download Conversation CSV")
-    export_csv_btn.click(download_conversation_csv, chatbot, export_csv_file)
-    download_json_file = gr.File(label="Download Dataset JSON")
-    download_json_btn.click(download_dataset, None, download_json_file)
-    stage_btn.click(stage_conversation, [chatbot, category_dropdown], None)
 if __name__ == "__main__":
-    demo.launch()

 import re
 import csv
 import tempfile
 from difflib import SequenceMatcher
+import datetime
 import gradio as gr
+from rapidfuzz import fuzz
 # -----------------------------
 # Config / data loading
 # -----------------------------
 DATA_PATH = "quotes.json"
+def load_quotes():
     if os.path.exists(DATA_PATH):
+        try:
+            with open(DATA_PATH, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if isinstance(data, dict):
+                print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.")
+                return data
+        except Exception as e:
+            print(f"Failed to load {DATA_PATH}: {e}")
+    print("No dataset file found. Upload one via the UI.")
+    return {}
+QUOTES = load_quotes()
 # -----------------------------
+# Text helpers
 # -----------------------------
+STOPWORDS = {
+    "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
+    "is","are","was","were","be","being","been","it","that","this","these","those","with",
+    "as","by","from","about","into","over","after","before","up","down","out"
+}
+POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
+NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
+punct_re = re.compile(f"[{re.escape('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')}]")
+def normalize(text: str) -> str:
+    return punct_re.sub(" ", (text or "").lower())
+def tokenize(text: str):
+    return [t for t in normalize(text).split() if t and t not in STOPWORDS]
+def infer_sentiment(user_text: str) -> str:
+    tl = normalize(user_text)
+    has_pos = any(w in tl for w in POS_HINTS)
+    has_neg = any(w in tl for w in NEG_HINTS)
+    if has_pos and not has_neg:
+        return "positive"
+    if has_neg and not has_pos:
+        return "negative"
+    return "positive"
+# -----------------------------
+# Retrieval
+# -----------------------------
+def best_match_quote(user_text: str) -> str:
+    """Search across all categories with fuzzy matching and return best quote."""
+    max_score = 0
+    best_quote = None
+    for cat_quotes in QUOTES.values():
+        for q_obj in cat_quotes:
+            q_text = q_obj.get("quote", "")
+            score = fuzz.token_set_ratio(user_text.lower(), q_text.lower())
+            if score > max_score:
+                max_score = score
+                best_quote = q_text
+    if max_score < 30:  # threshold; anything below treated as unknown
+        return f"No data about '{user_text}'"
+    return best_quote
+# -----------------------------
+# 3-fold response generation
+# -----------------------------
+def generate_three_fold_response(user_text: str):
+    quote = best_match_quote(user_text)
+    if quote.startswith("No data"):
+        return [quote, "", ""]
+    # Very simple 3-fold split
+    sentences = [s.strip() for s in quote.split('.') if s.strip()]
+    first = sentences[0] if len(sentences) > 0 else ""
+    second = " ".join(sentences[1:3]) if len(sentences) > 2 else (sentences[1] if len(sentences)>1 else "")
+    third = " ".join(sentences[3:]) if len(sentences) > 3 else ""
+    return [first, second, third]
 # -----------------------------
+# Gradio callbacks
 # -----------------------------
+def respond(message, history, category):
+    if not message:
+        return "", history
+    responses = generate_three_fold_response(message)
+    bot_response = [
+        {"label": "Summary", "text": responses[0]},
+        {"label": "Details", "text": responses[1]},
+        {"label": "What real people say", "text": responses[2]}
+    ]
+    history.append((message, bot_response))
+    return "", history
+def clear_chat():
+    return None
+def upload_json(filepath):
+    global QUOTES, DATA_PATH
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[])
+        QUOTES = data
+        DATA_PATH = os.path.basename(filepath)
+        cats = sorted(list(QUOTES.keys()))
+        status = f"Loaded {len(cats)} categories from {DATA_PATH}."
+        return status, gr.update(choices=cats, value=(cats[0] if cats else None))
+    except Exception as e:
+        return f"Error loading file: {e}", gr.update(choices=[])
+def download_current_json():
+    tmp = DATA_PATH or "quotes_export.json"
+    with open(tmp, "w", encoding="utf-8") as f:
+        json.dump(QUOTES, f, indent=2, ensure_ascii=False)
+    return tmp
 def download_conversation_csv(history):
     if not history:
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+        tmp.close()
+        return tmp.name
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode='w', newline='', encoding='utf-8')
+    writer = csv.writer(tmp)
+    writer.writerow(["User Message", "Summary", "Details", "What real people say"])
+    for msg, bot_resp in history:
+        summary = bot_resp[0]['text']
+        details = bot_resp[1]['text']
+        real_people = bot_resp[2]['text']
+        writer.writerow([msg, summary, details, real_people])
+    tmp.close()
+    return tmp.name
 # -----------------------------
+# UI
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, 3-Fold Responses")
+    # Category list from loaded data (may be empty until upload)
+    initial_categories = sorted(list(QUOTES.keys()))
     with gr.Row():
+        category = gr.Dropdown(
+            label="Category",
+            choices=initial_categories,
+            value=(initial_categories[0] if initial_categories else None)
+        )
+    chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
+    msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
+    send = gr.Button("Send")
+    clear = gr.Button("Clear")
+    with gr.Row():
+        uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
+        upload_status = gr.Textbox(label="Upload status", interactive=False)
+        download_json_btn = gr.File(label="Download dataset")
+        download_csv_btn = gr.File(label="Export conversation to CSV")
+    # Wire events
+    msg.submit(respond, [msg, chatbot, category], [msg, chatbot])
+    send.click(respond, [msg, chatbot, category], [msg, chatbot])
+    clear.click(clear_chat, None, chatbot, queue=False)
+    uploader.upload(upload_json, uploader, [upload_status, category])
+    download_json_btn.download(download_current_json)
+    download_csv_btn.click(download_conversation_csv, chatbot, download_csv_btn)
+# -----------------------------
+# Startup log
+# -----------------------------
+print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
+if QUOTES:
+    for cat, qlist in QUOTES.items():
+        print(f" - {cat}: {len(qlist)} entries")
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)