Spaces:

hparten
/

elementary_mathstudent_chatbot

Paused

App Files Files Community

hparten commited on Oct 19, 2025

Commit

ef396ec

1 Parent(s): 9ce3c16

updated logging

Browse files

Files changed (1) hide show

app.py +135 -19

app.py CHANGED Viewed

@@ -15,6 +15,9 @@ from transformers import (
     StoppingCriteriaList,
 )
 from peft import PeftModel
 # =========================
 # ⚙️ Config
@@ -85,21 +88,21 @@ def build_system_block(problem_prefix, strategy):
 # =========================
 # 🧾 Logging
 # =========================
-CSV_HEADERS = ["timestamp", "session_id", "username", "strategy", "teacher", "student"]
-def _append_csv(path, row):
-    with FileLock(LOCK_PATH):
-        file_exists = os.path.exists(path)
-        with open(path, "a", newline="", encoding="utf-8") as f:
-            w = csv.writer(f)
-            if not file_exists:
-                w.writerow(CSV_HEADERS)
-            w.writerow(row)
-def log_turn(session_id, username, strategy, teacher_msg, student_msg):
-    row = [datetime.now().isoformat(timespec="seconds"), session_id, username, strategy, teacher_msg, student_msg]
-    per_session = os.path.join(LOG_DIR, f"chat_{session_id}.csv")
-    _append_csv(per_session, row)
 # =========================
 # 🧩 Prompt builder
@@ -144,6 +147,64 @@ bad_words_ids = make_bad_words_ids(
 )
 eos_id = tokenizer.convert_tokens_to_ids("</student>")
 # =========================
 # 🤖 Generation
 # =========================
@@ -174,9 +235,67 @@ def generate_response(teacher_question, username, history, session_id, strategy)
     student_reply = student_reply.split(".")[0].strip() + "."
     history.append((teacher_question, student_reply))
-    log_turn(session_id, username, strategy, teacher_question, student_reply)
     return student_reply, history
 # =========================
 # 🖥 Gradio UI
 # =========================
@@ -216,7 +335,6 @@ def on_reset(chat, history, teacher_q, session_id):
         except Exception as e:
             print(f"[manual flush error] {session_id}: {e}")
-    # return cleared state and a new session id
     return [], [], "", uuid.uuid4().hex[:12]
 # =========================
@@ -257,8 +375,6 @@ with gr.Blocks(title="Elementary Math Student Chatbot") as demo:
     outputs=[chat, state_history, teacher_q, state_session],
 )
-if __name__ == "__main__":
-    demo.launch()
 if __name__ == "__main__":
     demo.queue()

     StoppingCriteriaList,
 )
 from peft import PeftModel
+import tempfile
+import pandas as pd
+from datasets import load_dataset
 # =========================
 # ⚙️ Config
 # =========================
 # 🧾 Logging
 # =========================
+#CSV_HEADERS = ["timestamp", "session_id", "username", "strategy", "teacher", "student"]
+#
+#def _append_csv(path, row):
+#    with FileLock(LOCK_PATH):
+#        file_exists = os.path.exists(path)
+#        with open(path, "a", newline="", encoding="utf-8") as f:
+#            w = csv.writer(f)
+#            if not file_exists:
+#                w.writerow(CSV_HEADERS)
+#            w.writerow(row)
+#
+#def log_turn(session_id, username, strategy, teacher_msg, student_msg):
+#    row = [datetime.now().isoformat(timespec="seconds"), session_id, username, strategy, #teacher_msg, student_msg]
+#    per_session = os.path.join(LOG_DIR, f"chat_{session_id}.csv")
+#    _append_csv(per_session, row)
 # =========================
 # 🧩 Prompt builder
 )
 eos_id = tokenizer.convert_tokens_to_ids("</student>")
+# =========================
+# ☁️ In-Memory Logging + HF Upload
+# =========================
+HF_DATASET_REPO = "hparten/math_chatbot_logs"
+api = HfApi()
+session_logs = {}       # session_id -> list of turns
+last_activity = {}      # session_id -> timestamp
+def add_turn_to_memory(session_id, username, strategy, teacher_msg, student_msg):
+    """Store one turn in memory."""
+    from datetime import datetime
+    row = {
+        "timestamp": datetime.now().isoformat(timespec="seconds"),
+        "session_id": session_id,
+        "username": username,
+        "strategy": strategy,
+        "teacher": teacher_msg,
+        "student": student_msg,
+    }
+    session_logs.setdefault(session_id, []).append(row)
+    update_activity(session_id)
+def update_activity(session_id):
+    import time
+    last_activity[session_id] = time.time()
+def flush_session_to_hub(session_id):
+    """Upload session logs to Hugging Face dataset as a single Parquet file."""
+    if session_id not in session_logs or not session_logs[session_id]:
+        print(f"[flush] No logs found for session {session_id}")
+        return
+    df = pd.DataFrame(session_logs[session_id])
+    del session_logs[session_id]
+    try:
+        ds = load_dataset(HF_DATASET_REPO, split="train", token=HF_TOKEN)
+        existing = ds.to_pandas()
+        combined = pd.concat([existing, df], ignore_index=True)
+    except Exception:
+        combined = df
+    with tempfile.NamedTemporaryFile("wb", delete=False, suffix=".parquet") as tmp:
+        combined.to_parquet(tmp.name, index=False)
+        tmp_path = tmp.name
+    api.upload_file(
+        path_or_fileobj=tmp_path,
+        path_in_repo="chat_logs.parquet",
+        repo_id=HF_DATASET_REPO,
+        repo_type="dataset",
+        token=HF_TOKEN,
+    )
+    os.remove(tmp_path)
+    print(f"[flush] Uploaded session {session_id} to HF dataset.")
 # =========================
 # 🤖 Generation
 # =========================
     student_reply = student_reply.split(".")[0].strip() + "."
     history.append((teacher_question, student_reply))
+    add_turn_to_memory(session_id, username, strategy, teacher_question, student_reply)
     return student_reply, history
+# =========================
+# ☁️ Flush session logs to Hugging Face Hub
+# =========================
+#
+#def flush_session_to_hub(session_id):
+#    """Append this session to one Parquet file in the private HF dataset."""
+#    if session_id not in session_logs or not session_logs[session_id]:
+#        print(f"[flush] No logs found for session {session_id}")
+#        return
+#
+#    df = pd.DataFrame(session_logs[session_id])
+#    del session_logs[session_id]
+#
+#    try:
+#        ds = load_dataset(HF_DATASET_REPO, split="train", token=HF_TOKEN)
+#        existing = ds.to_pandas()
+#        combined = pd.concat([existing, df], ignore_index=True)
+#    except Exception:
+#        combined = df
+#
+#    with tempfile.NamedTemporaryFile("wb", delete=False, suffix=".parquet") as tmp:
+#        combined.to_parquet(tmp.name, index=False)
+#        tmp_path = tmp.name
+#
+#    api.upload_file(
+#        path_or_fileobj=tmp_path,
+#        path_in_repo="chat_logs.parquet",
+#        repo_id=HF_DATASET_REPO,
+#        repo_type="dataset",
+#        token=HF_TOKEN,
+#    )
+#
+#    os.remove(tmp_path)
+#    print(f"[flush] Uploaded session {session_id} to HF dataset.")
+# =========================
+# Inactivity flush
+# =========================
+import threading, time
+INACTIVITY_LIMIT = 600  # 10 minutes
+def check_inactivity_loop():
+    while True:
+        now = time.time()
+        inactive = [sid for sid, ts in last_activity.items() if now - ts > INACTIVITY_LIMIT]
+        for sid in inactive:
+            try:
+                flush_session_to_hub(sid)
+                del last_activity[sid]
+            except Exception as e:
+                print(f"[auto-flush-error] {sid}: {e}")
+        time.sleep(60)
+threading.Thread(target=check_inactivity_loop, daemon=True).start()
 # =========================
 # 🖥 Gradio UI
 # =========================
         except Exception as e:
             print(f"[manual flush error] {session_id}: {e}")
     return [], [], "", uuid.uuid4().hex[:12]
 # =========================
     outputs=[chat, state_history, teacher_q, state_session],
 )
 if __name__ == "__main__":
     demo.queue()