Spaces:

build-small-hackathon
/

PaperProf

Running on Zero

File size: 9,818 Bytes

3f8b85e

"""
share_trace.py — Run a live PaperProf session and push the agent trace to HF Hub.

Records each LLM step (question generation, answer evaluation, MCQ generation)
as a structured dataset so the community can see how PaperProf works end-to-end.

Usage:
    python share_trace.py

Output:
    Dataset pushed to build-small-hackathon/PaperProf-traces
"""

import json
import time
import uuid
import os
import sys
from datetime import datetime, timezone

sys.path.insert(0, os.path.dirname(__file__))

TRACE_REPO = "build-small-hackathon/PaperProf-traces"

# Three chunks from different domains — covers the full diversity of PaperProf use cases
DEMO_CHUNKS = [
    {
        "topic": "Operating Systems — Virtual Memory",
        "chunk": (
            "Virtual memory is a memory management technique that gives each process the "
            "illusion of having access to a large, contiguous block of memory. The OS maps "
            "virtual addresses used by programs to physical addresses in RAM using a page table. "
            "When a process accesses a page not currently in RAM, a page fault occurs and the OS "
            "loads the required page from disk (swap space). This allows systems to run programs "
            "larger than physical RAM and provides memory isolation between processes."
        ),
        "student_answers": {
            "open": "Virtual memory allows programs to use more memory than physically available by mapping virtual addresses to physical ones using a page table.",
            "wrong": "Virtual memory is just another name for RAM, it speeds up the CPU cache.",
        },
    },
    {
        "topic": "Machine Learning — Gradient Descent",
        "chunk": (
            "Gradient descent is an iterative optimization algorithm used to minimize a loss "
            "function by updating model parameters in the direction opposite to the gradient. "
            "In each iteration, the gradient of the loss with respect to the parameters is "
            "computed, and the parameters are updated as θ = θ − α∇L(θ), where α is the "
            "learning rate. Too large a learning rate causes divergence; too small slows "
            "convergence. Stochastic gradient descent (SGD) approximates the true gradient "
            "using a random mini-batch at each step, making it scalable to large datasets."
        ),
        "student_answers": {
            "open": "Gradient descent minimizes the loss by repeatedly moving parameters opposite to the gradient, scaled by the learning rate.",
            "wrong": "Gradient descent always finds the global minimum of any function.",
        },
    },
    {
        "topic": "Networking — TCP Three-Way Handshake",
        "chunk": (
            "The TCP three-way handshake establishes a reliable connection between a client "
            "and server before data transfer begins. The client sends a SYN segment, the server "
            "responds with SYN-ACK, and the client completes the handshake with an ACK. Each "
            "side advertises its initial sequence number during this exchange, which is used to "
            "order and acknowledge packets throughout the connection. This ensures both parties "
            "are ready to send and receive before any application data flows."
        ),
        "student_answers": {
            "open": "The TCP handshake uses SYN, SYN-ACK, and ACK to synchronize sequence numbers and confirm both sides are ready to communicate.",
            "wrong": "TCP uses a two-way handshake: SYN from client and ACK from server.",
        },
    },
]


def timed(fn, *args, **kwargs):
    t0 = time.time()
    result = fn(*args, **kwargs)
    return result, round(time.time() - t0, 2)


def run_session(chunk_info: dict, session_id: str, model_id: str) -> list[dict]:
    from core.questioner import generate_question, generate_mcq
    from core.evaluator import evaluate_answer

    steps = []
    chunk = chunk_info["chunk"]
    topic = chunk_info["topic"]
    answers = chunk_info["student_answers"]

    print(f"\n{'='*60}")
    print(f"Topic: {topic}")
    print(f"{'='*60}")

    # Step 1 — Open question generation
    print("[1/4] Generating open question…")
    question, dur = timed(generate_question, chunk, language="English", difficulty="Normal")
    print(f"      Q: {question}  ({dur}s)")
    steps.append({
        "session_id": session_id,
        "step": 1,
        "type": "question_generation",
        "topic": topic,
        "input": {"chunk": chunk, "difficulty": "Normal", "language": "English"},
        "output": {"question": question},
        "duration_s": dur,
        "model": model_id,
        "timestamp": datetime.now(timezone.utc).isoformat(),
    })

    # Step 2 — Evaluate a correct answer
    print("[2/4] Evaluating correct answer…")
    feedback_ok, dur = timed(evaluate_answer, question, chunk, answers["open"], language="English")
    print(f"      Feedback (correct): {feedback_ok[:80]}…  ({dur}s)")
    steps.append({
        "session_id": session_id,
        "step": 2,
        "type": "answer_evaluation",
        "topic": topic,
        "input": {
            "chunk": chunk,
            "question": question,
            "student_answer": answers["open"],
            "expected_quality": "correct",
        },
        "output": {"feedback": feedback_ok},
        "duration_s": dur,
        "model": model_id,
        "timestamp": datetime.now(timezone.utc).isoformat(),
    })

    # Step 3 — Evaluate a wrong answer
    print("[3/4] Evaluating incorrect answer…")
    feedback_bad, dur = timed(evaluate_answer, question, chunk, answers["wrong"], language="English")
    print(f"      Feedback (wrong):   {feedback_bad[:80]}…  ({dur}s)")
    steps.append({
        "session_id": session_id,
        "step": 3,
        "type": "answer_evaluation",
        "topic": topic,
        "input": {
            "chunk": chunk,
            "question": question,
            "student_answer": answers["wrong"],
            "expected_quality": "incorrect",
        },
        "output": {"feedback": feedback_bad},
        "duration_s": dur,
        "model": model_id,
        "timestamp": datetime.now(timezone.utc).isoformat(),
    })

    # Step 4 — MCQ generation
    print("[4/4] Generating MCQ…")
    mcq, dur = timed(generate_mcq, chunk, language="English")
    print(f"      MCQ question: {str(mcq.get('question',''))[:80]}  ({dur}s)")
    steps.append({
        "session_id": session_id,
        "step": 4,
        "type": "mcq_generation",
        "topic": topic,
        "input": {"chunk": chunk, "language": "English"},
        "output": {"mcq": mcq},
        "duration_s": dur,
        "model": model_id,
        "timestamp": datetime.now(timezone.utc).isoformat(),
    })

    return steps


def push_trace(all_steps: list[dict], model_id: str):
    from huggingface_hub import HfApi

    token = os.environ.get("HF_TOKEN")
    api = HfApi(token=token)

    api.create_repo(TRACE_REPO, repo_type="dataset", exist_ok=True, private=False)

    # JSONL trace file
    jsonl = "\n".join(json.dumps(s, ensure_ascii=False) for s in all_steps)
    trace_bytes = jsonl.encode()

    api.upload_file(
        path_or_fileobj=trace_bytes,
        path_in_repo="paperprof_trace.jsonl",
        repo_id=TRACE_REPO,
        repo_type="dataset",
        commit_message="chore: upload PaperProf agent trace",
    )

    readme = f"""---
license: apache-2.0
task_categories:
  - question-answering
  - text-generation
language:
  - en
tags:
  - agent-trace
  - education
  - paperprof
  - build-small-hackathon
---

# PaperProf Agent Trace

Step-by-step trace of [PaperProf](https://huggingface.co/spaces/build-small-hackathon/PaperProf),
an AI study buddy that turns course PDFs into interactive quiz sessions.

## What's in this dataset

Each row in `paperprof_trace.jsonl` is one LLM call. Fields:

| Field | Description |
|---|---|
| `session_id` | Groups steps from the same session |
| `step` | Step index within the session (1–4) |
| `type` | `question_generation` / `answer_evaluation` / `mcq_generation` |
| `topic` | Domain of the source chunk |
| `input` | Exact input sent to the model (chunk, question, student answer…) |
| `output` | Raw model output |
| `duration_s` | Wall-clock inference time |
| `model` | Model ID used |

## Session structure

Each session runs 4 steps on one text chunk:
1. **Open question generation** — the model writes a focused exam question
2. **Correct answer evaluation** — structured tutor feedback on a good answer
3. **Wrong answer evaluation** — structured tutor feedback on a bad answer
4. **MCQ generation** — 4-option question with per-option explanations

Three sessions are included, covering: Operating Systems, Machine Learning, and Networking.

## Model

`{model_id}`

Built for the Build Small Hackathon, June 2026, by Team PaperProf (EPITA).
"""
    api.upload_file(
        path_or_fileobj=readme.encode(),
        path_in_repo="README.md",
        repo_id=TRACE_REPO,
        repo_type="dataset",
        commit_message="chore: add dataset card",
    )

    print(f"\n✅ Trace pushed → https://huggingface.co/datasets/{TRACE_REPO}")


def main():
    from model.llm import get_llm, DEFAULT_MODEL_ID

    print("Loading model (first call may take 60–90s locally)…")
    get_llm()  # warm up
    model_id = os.environ.get("PAPERPROF_MODEL", DEFAULT_MODEL_ID)

    all_steps = []
    for chunk_info in DEMO_CHUNKS:
        session_id = str(uuid.uuid4())[:8]
        steps = run_session(chunk_info, session_id, model_id)
        all_steps.extend(steps)

    print(f"\n[push] {len(all_steps)} steps captured across {len(DEMO_CHUNKS)} sessions…")
    push_trace(all_steps, model_id)


if __name__ == "__main__":
    main()