""" share_trace.py — Run a live PaperProf session and push the agent trace to HF Hub. Records each LLM step (question generation, answer evaluation, MCQ generation) as a structured dataset so the community can see how PaperProf works end-to-end. Usage: python share_trace.py Output: Dataset pushed to build-small-hackathon/PaperProf-traces """ import json import time import uuid import os import sys from datetime import datetime, timezone sys.path.insert(0, os.path.dirname(__file__)) TRACE_REPO = "build-small-hackathon/PaperProf-traces" # Three chunks from different domains — covers the full diversity of PaperProf use cases DEMO_CHUNKS = [ { "topic": "Operating Systems — Virtual Memory", "chunk": ( "Virtual memory is a memory management technique that gives each process the " "illusion of having access to a large, contiguous block of memory. The OS maps " "virtual addresses used by programs to physical addresses in RAM using a page table. " "When a process accesses a page not currently in RAM, a page fault occurs and the OS " "loads the required page from disk (swap space). This allows systems to run programs " "larger than physical RAM and provides memory isolation between processes." ), "student_answers": { "open": "Virtual memory allows programs to use more memory than physically available by mapping virtual addresses to physical ones using a page table.", "wrong": "Virtual memory is just another name for RAM, it speeds up the CPU cache.", }, }, { "topic": "Machine Learning — Gradient Descent", "chunk": ( "Gradient descent is an iterative optimization algorithm used to minimize a loss " "function by updating model parameters in the direction opposite to the gradient. " "In each iteration, the gradient of the loss with respect to the parameters is " "computed, and the parameters are updated as θ = θ − α∇L(θ), where α is the " "learning rate. Too large a learning rate causes divergence; too small slows " "convergence. Stochastic gradient descent (SGD) approximates the true gradient " "using a random mini-batch at each step, making it scalable to large datasets." ), "student_answers": { "open": "Gradient descent minimizes the loss by repeatedly moving parameters opposite to the gradient, scaled by the learning rate.", "wrong": "Gradient descent always finds the global minimum of any function.", }, }, { "topic": "Networking — TCP Three-Way Handshake", "chunk": ( "The TCP three-way handshake establishes a reliable connection between a client " "and server before data transfer begins. The client sends a SYN segment, the server " "responds with SYN-ACK, and the client completes the handshake with an ACK. Each " "side advertises its initial sequence number during this exchange, which is used to " "order and acknowledge packets throughout the connection. This ensures both parties " "are ready to send and receive before any application data flows." ), "student_answers": { "open": "The TCP handshake uses SYN, SYN-ACK, and ACK to synchronize sequence numbers and confirm both sides are ready to communicate.", "wrong": "TCP uses a two-way handshake: SYN from client and ACK from server.", }, }, ] def timed(fn, *args, **kwargs): t0 = time.time() result = fn(*args, **kwargs) return result, round(time.time() - t0, 2) def run_session(chunk_info: dict, session_id: str, model_id: str) -> list[dict]: from core.questioner import generate_question, generate_mcq from core.evaluator import evaluate_answer steps = [] chunk = chunk_info["chunk"] topic = chunk_info["topic"] answers = chunk_info["student_answers"] print(f"\n{'='*60}") print(f"Topic: {topic}") print(f"{'='*60}") # Step 1 — Open question generation print("[1/4] Generating open question…") question, dur = timed(generate_question, chunk, language="English", difficulty="Normal") print(f" Q: {question} ({dur}s)") steps.append({ "session_id": session_id, "step": 1, "type": "question_generation", "topic": topic, "input": {"chunk": chunk, "difficulty": "Normal", "language": "English"}, "output": {"question": question}, "duration_s": dur, "model": model_id, "timestamp": datetime.now(timezone.utc).isoformat(), }) # Step 2 — Evaluate a correct answer print("[2/4] Evaluating correct answer…") feedback_ok, dur = timed(evaluate_answer, question, chunk, answers["open"], language="English") print(f" Feedback (correct): {feedback_ok[:80]}… ({dur}s)") steps.append({ "session_id": session_id, "step": 2, "type": "answer_evaluation", "topic": topic, "input": { "chunk": chunk, "question": question, "student_answer": answers["open"], "expected_quality": "correct", }, "output": {"feedback": feedback_ok}, "duration_s": dur, "model": model_id, "timestamp": datetime.now(timezone.utc).isoformat(), }) # Step 3 — Evaluate a wrong answer print("[3/4] Evaluating incorrect answer…") feedback_bad, dur = timed(evaluate_answer, question, chunk, answers["wrong"], language="English") print(f" Feedback (wrong): {feedback_bad[:80]}… ({dur}s)") steps.append({ "session_id": session_id, "step": 3, "type": "answer_evaluation", "topic": topic, "input": { "chunk": chunk, "question": question, "student_answer": answers["wrong"], "expected_quality": "incorrect", }, "output": {"feedback": feedback_bad}, "duration_s": dur, "model": model_id, "timestamp": datetime.now(timezone.utc).isoformat(), }) # Step 4 — MCQ generation print("[4/4] Generating MCQ…") mcq, dur = timed(generate_mcq, chunk, language="English") print(f" MCQ question: {str(mcq.get('question',''))[:80]} ({dur}s)") steps.append({ "session_id": session_id, "step": 4, "type": "mcq_generation", "topic": topic, "input": {"chunk": chunk, "language": "English"}, "output": {"mcq": mcq}, "duration_s": dur, "model": model_id, "timestamp": datetime.now(timezone.utc).isoformat(), }) return steps def push_trace(all_steps: list[dict], model_id: str): from huggingface_hub import HfApi token = os.environ.get("HF_TOKEN") api = HfApi(token=token) api.create_repo(TRACE_REPO, repo_type="dataset", exist_ok=True, private=False) # JSONL trace file jsonl = "\n".join(json.dumps(s, ensure_ascii=False) for s in all_steps) trace_bytes = jsonl.encode() api.upload_file( path_or_fileobj=trace_bytes, path_in_repo="paperprof_trace.jsonl", repo_id=TRACE_REPO, repo_type="dataset", commit_message="chore: upload PaperProf agent trace", ) readme = f"""--- license: apache-2.0 task_categories: - question-answering - text-generation language: - en tags: - agent-trace - education - paperprof - build-small-hackathon --- # PaperProf Agent Trace Step-by-step trace of [PaperProf](https://huggingface.co/spaces/build-small-hackathon/PaperProf), an AI study buddy that turns course PDFs into interactive quiz sessions. ## What's in this dataset Each row in `paperprof_trace.jsonl` is one LLM call. Fields: | Field | Description | |---|---| | `session_id` | Groups steps from the same session | | `step` | Step index within the session (1–4) | | `type` | `question_generation` / `answer_evaluation` / `mcq_generation` | | `topic` | Domain of the source chunk | | `input` | Exact input sent to the model (chunk, question, student answer…) | | `output` | Raw model output | | `duration_s` | Wall-clock inference time | | `model` | Model ID used | ## Session structure Each session runs 4 steps on one text chunk: 1. **Open question generation** — the model writes a focused exam question 2. **Correct answer evaluation** — structured tutor feedback on a good answer 3. **Wrong answer evaluation** — structured tutor feedback on a bad answer 4. **MCQ generation** — 4-option question with per-option explanations Three sessions are included, covering: Operating Systems, Machine Learning, and Networking. ## Model `{model_id}` Built for the Build Small Hackathon, June 2026, by Team PaperProf (EPITA). """ api.upload_file( path_or_fileobj=readme.encode(), path_in_repo="README.md", repo_id=TRACE_REPO, repo_type="dataset", commit_message="chore: add dataset card", ) print(f"\n✅ Trace pushed → https://huggingface.co/datasets/{TRACE_REPO}") def main(): from model.llm import get_llm, DEFAULT_MODEL_ID print("Loading model (first call may take 60–90s locally)…") get_llm() # warm up model_id = os.environ.get("PAPERPROF_MODEL", DEFAULT_MODEL_ID) all_steps = [] for chunk_info in DEMO_CHUNKS: session_id = str(uuid.uuid4())[:8] steps = run_session(chunk_info, session_id, model_id) all_steps.extend(steps) print(f"\n[push] {len(all_steps)} steps captured across {len(DEMO_CHUNKS)} sessions…") push_trace(all_steps, model_id) if __name__ == "__main__": main()