Spaces:
Running on Zero
Running on Zero
| """ | |
| share_trace.py — Run a live PaperProf session and push the agent trace to HF Hub. | |
| Records each LLM step (question generation, answer evaluation, MCQ generation) | |
| as a structured dataset so the community can see how PaperProf works end-to-end. | |
| Usage: | |
| python share_trace.py | |
| Output: | |
| Dataset pushed to build-small-hackathon/PaperProf-traces | |
| """ | |
| import json | |
| import time | |
| import uuid | |
| import os | |
| import sys | |
| from datetime import datetime, timezone | |
| sys.path.insert(0, os.path.dirname(__file__)) | |
| TRACE_REPO = "build-small-hackathon/PaperProf-traces" | |
| # Three chunks from different domains — covers the full diversity of PaperProf use cases | |
| DEMO_CHUNKS = [ | |
| { | |
| "topic": "Operating Systems — Virtual Memory", | |
| "chunk": ( | |
| "Virtual memory is a memory management technique that gives each process the " | |
| "illusion of having access to a large, contiguous block of memory. The OS maps " | |
| "virtual addresses used by programs to physical addresses in RAM using a page table. " | |
| "When a process accesses a page not currently in RAM, a page fault occurs and the OS " | |
| "loads the required page from disk (swap space). This allows systems to run programs " | |
| "larger than physical RAM and provides memory isolation between processes." | |
| ), | |
| "student_answers": { | |
| "open": "Virtual memory allows programs to use more memory than physically available by mapping virtual addresses to physical ones using a page table.", | |
| "wrong": "Virtual memory is just another name for RAM, it speeds up the CPU cache.", | |
| }, | |
| }, | |
| { | |
| "topic": "Machine Learning — Gradient Descent", | |
| "chunk": ( | |
| "Gradient descent is an iterative optimization algorithm used to minimize a loss " | |
| "function by updating model parameters in the direction opposite to the gradient. " | |
| "In each iteration, the gradient of the loss with respect to the parameters is " | |
| "computed, and the parameters are updated as θ = θ − α∇L(θ), where α is the " | |
| "learning rate. Too large a learning rate causes divergence; too small slows " | |
| "convergence. Stochastic gradient descent (SGD) approximates the true gradient " | |
| "using a random mini-batch at each step, making it scalable to large datasets." | |
| ), | |
| "student_answers": { | |
| "open": "Gradient descent minimizes the loss by repeatedly moving parameters opposite to the gradient, scaled by the learning rate.", | |
| "wrong": "Gradient descent always finds the global minimum of any function.", | |
| }, | |
| }, | |
| { | |
| "topic": "Networking — TCP Three-Way Handshake", | |
| "chunk": ( | |
| "The TCP three-way handshake establishes a reliable connection between a client " | |
| "and server before data transfer begins. The client sends a SYN segment, the server " | |
| "responds with SYN-ACK, and the client completes the handshake with an ACK. Each " | |
| "side advertises its initial sequence number during this exchange, which is used to " | |
| "order and acknowledge packets throughout the connection. This ensures both parties " | |
| "are ready to send and receive before any application data flows." | |
| ), | |
| "student_answers": { | |
| "open": "The TCP handshake uses SYN, SYN-ACK, and ACK to synchronize sequence numbers and confirm both sides are ready to communicate.", | |
| "wrong": "TCP uses a two-way handshake: SYN from client and ACK from server.", | |
| }, | |
| }, | |
| ] | |
| def timed(fn, *args, **kwargs): | |
| t0 = time.time() | |
| result = fn(*args, **kwargs) | |
| return result, round(time.time() - t0, 2) | |
| def run_session(chunk_info: dict, session_id: str, model_id: str) -> list[dict]: | |
| from core.questioner import generate_question, generate_mcq | |
| from core.evaluator import evaluate_answer | |
| steps = [] | |
| chunk = chunk_info["chunk"] | |
| topic = chunk_info["topic"] | |
| answers = chunk_info["student_answers"] | |
| print(f"\n{'='*60}") | |
| print(f"Topic: {topic}") | |
| print(f"{'='*60}") | |
| # Step 1 — Open question generation | |
| print("[1/4] Generating open question…") | |
| question, dur = timed(generate_question, chunk, language="English", difficulty="Normal") | |
| print(f" Q: {question} ({dur}s)") | |
| steps.append({ | |
| "session_id": session_id, | |
| "step": 1, | |
| "type": "question_generation", | |
| "topic": topic, | |
| "input": {"chunk": chunk, "difficulty": "Normal", "language": "English"}, | |
| "output": {"question": question}, | |
| "duration_s": dur, | |
| "model": model_id, | |
| "timestamp": datetime.now(timezone.utc).isoformat(), | |
| }) | |
| # Step 2 — Evaluate a correct answer | |
| print("[2/4] Evaluating correct answer…") | |
| feedback_ok, dur = timed(evaluate_answer, question, chunk, answers["open"], language="English") | |
| print(f" Feedback (correct): {feedback_ok[:80]}… ({dur}s)") | |
| steps.append({ | |
| "session_id": session_id, | |
| "step": 2, | |
| "type": "answer_evaluation", | |
| "topic": topic, | |
| "input": { | |
| "chunk": chunk, | |
| "question": question, | |
| "student_answer": answers["open"], | |
| "expected_quality": "correct", | |
| }, | |
| "output": {"feedback": feedback_ok}, | |
| "duration_s": dur, | |
| "model": model_id, | |
| "timestamp": datetime.now(timezone.utc).isoformat(), | |
| }) | |
| # Step 3 — Evaluate a wrong answer | |
| print("[3/4] Evaluating incorrect answer…") | |
| feedback_bad, dur = timed(evaluate_answer, question, chunk, answers["wrong"], language="English") | |
| print(f" Feedback (wrong): {feedback_bad[:80]}… ({dur}s)") | |
| steps.append({ | |
| "session_id": session_id, | |
| "step": 3, | |
| "type": "answer_evaluation", | |
| "topic": topic, | |
| "input": { | |
| "chunk": chunk, | |
| "question": question, | |
| "student_answer": answers["wrong"], | |
| "expected_quality": "incorrect", | |
| }, | |
| "output": {"feedback": feedback_bad}, | |
| "duration_s": dur, | |
| "model": model_id, | |
| "timestamp": datetime.now(timezone.utc).isoformat(), | |
| }) | |
| # Step 4 — MCQ generation | |
| print("[4/4] Generating MCQ…") | |
| mcq, dur = timed(generate_mcq, chunk, language="English") | |
| print(f" MCQ question: {str(mcq.get('question',''))[:80]} ({dur}s)") | |
| steps.append({ | |
| "session_id": session_id, | |
| "step": 4, | |
| "type": "mcq_generation", | |
| "topic": topic, | |
| "input": {"chunk": chunk, "language": "English"}, | |
| "output": {"mcq": mcq}, | |
| "duration_s": dur, | |
| "model": model_id, | |
| "timestamp": datetime.now(timezone.utc).isoformat(), | |
| }) | |
| return steps | |
| def push_trace(all_steps: list[dict], model_id: str): | |
| from huggingface_hub import HfApi | |
| token = os.environ.get("HF_TOKEN") | |
| api = HfApi(token=token) | |
| api.create_repo(TRACE_REPO, repo_type="dataset", exist_ok=True, private=False) | |
| # JSONL trace file | |
| jsonl = "\n".join(json.dumps(s, ensure_ascii=False) for s in all_steps) | |
| trace_bytes = jsonl.encode() | |
| api.upload_file( | |
| path_or_fileobj=trace_bytes, | |
| path_in_repo="paperprof_trace.jsonl", | |
| repo_id=TRACE_REPO, | |
| repo_type="dataset", | |
| commit_message="chore: upload PaperProf agent trace", | |
| ) | |
| readme = f"""--- | |
| license: apache-2.0 | |
| task_categories: | |
| - question-answering | |
| - text-generation | |
| language: | |
| - en | |
| tags: | |
| - agent-trace | |
| - education | |
| - paperprof | |
| - build-small-hackathon | |
| --- | |
| # PaperProf Agent Trace | |
| Step-by-step trace of [PaperProf](https://huggingface.co/spaces/build-small-hackathon/PaperProf), | |
| an AI study buddy that turns course PDFs into interactive quiz sessions. | |
| ## What's in this dataset | |
| Each row in `paperprof_trace.jsonl` is one LLM call. Fields: | |
| | Field | Description | | |
| |---|---| | |
| | `session_id` | Groups steps from the same session | | |
| | `step` | Step index within the session (1–4) | | |
| | `type` | `question_generation` / `answer_evaluation` / `mcq_generation` | | |
| | `topic` | Domain of the source chunk | | |
| | `input` | Exact input sent to the model (chunk, question, student answer…) | | |
| | `output` | Raw model output | | |
| | `duration_s` | Wall-clock inference time | | |
| | `model` | Model ID used | | |
| ## Session structure | |
| Each session runs 4 steps on one text chunk: | |
| 1. **Open question generation** — the model writes a focused exam question | |
| 2. **Correct answer evaluation** — structured tutor feedback on a good answer | |
| 3. **Wrong answer evaluation** — structured tutor feedback on a bad answer | |
| 4. **MCQ generation** — 4-option question with per-option explanations | |
| Three sessions are included, covering: Operating Systems, Machine Learning, and Networking. | |
| ## Model | |
| `{model_id}` | |
| Built for the Build Small Hackathon, June 2026, by Team PaperProf (EPITA). | |
| """ | |
| api.upload_file( | |
| path_or_fileobj=readme.encode(), | |
| path_in_repo="README.md", | |
| repo_id=TRACE_REPO, | |
| repo_type="dataset", | |
| commit_message="chore: add dataset card", | |
| ) | |
| print(f"\n✅ Trace pushed → https://huggingface.co/datasets/{TRACE_REPO}") | |
| def main(): | |
| from model.llm import get_llm, DEFAULT_MODEL_ID | |
| print("Loading model (first call may take 60–90s locally)…") | |
| get_llm() # warm up | |
| model_id = os.environ.get("PAPERPROF_MODEL", DEFAULT_MODEL_ID) | |
| all_steps = [] | |
| for chunk_info in DEMO_CHUNKS: | |
| session_id = str(uuid.uuid4())[:8] | |
| steps = run_session(chunk_info, session_id, model_id) | |
| all_steps.extend(steps) | |
| print(f"\n[push] {len(all_steps)} steps captured across {len(DEMO_CHUNKS)} sessions…") | |
| push_trace(all_steps, model_id) | |
| if __name__ == "__main__": | |
| main() | |