Spaces:

build-small-hackathon
/

PaperProf

Running on Zero

PaperProf / share_trace.py

Mehdi

feat: add share_trace.py — run session + push agent trace to HF Hub dataset

3f8b85e 13 days ago

9.82 kB

	"""
	share_trace.py — Run a live PaperProf session and push the agent trace to HF Hub.

	Records each LLM step (question generation, answer evaluation, MCQ generation)
	as a structured dataset so the community can see how PaperProf works end-to-end.

	Usage:
	python share_trace.py

	Output:
	Dataset pushed to build-small-hackathon/PaperProf-traces
	"""

	import json
	import time
	import uuid
	import os
	import sys
	from datetime import datetime, timezone

	sys.path.insert(0, os.path.dirname(__file__))

	TRACE_REPO = "build-small-hackathon/PaperProf-traces"

	# Three chunks from different domains — covers the full diversity of PaperProf use cases
	DEMO_CHUNKS = [
	{
	"topic": "Operating Systems — Virtual Memory",
	"chunk": (
	"Virtual memory is a memory management technique that gives each process the "
	"illusion of having access to a large, contiguous block of memory. The OS maps "
	"virtual addresses used by programs to physical addresses in RAM using a page table. "
	"When a process accesses a page not currently in RAM, a page fault occurs and the OS "
	"loads the required page from disk (swap space). This allows systems to run programs "
	"larger than physical RAM and provides memory isolation between processes."
	),
	"student_answers": {
	"open": "Virtual memory allows programs to use more memory than physically available by mapping virtual addresses to physical ones using a page table.",
	"wrong": "Virtual memory is just another name for RAM, it speeds up the CPU cache.",
	},
	},
	{
	"topic": "Machine Learning — Gradient Descent",
	"chunk": (
	"Gradient descent is an iterative optimization algorithm used to minimize a loss "
	"function by updating model parameters in the direction opposite to the gradient. "
	"In each iteration, the gradient of the loss with respect to the parameters is "
	"computed, and the parameters are updated as θ = θ − α∇L(θ), where α is the "
	"learning rate. Too large a learning rate causes divergence; too small slows "
	"convergence. Stochastic gradient descent (SGD) approximates the true gradient "
	"using a random mini-batch at each step, making it scalable to large datasets."
	),
	"student_answers": {
	"open": "Gradient descent minimizes the loss by repeatedly moving parameters opposite to the gradient, scaled by the learning rate.",
	"wrong": "Gradient descent always finds the global minimum of any function.",
	},
	},
	{
	"topic": "Networking — TCP Three-Way Handshake",
	"chunk": (
	"The TCP three-way handshake establishes a reliable connection between a client "
	"and server before data transfer begins. The client sends a SYN segment, the server "
	"responds with SYN-ACK, and the client completes the handshake with an ACK. Each "
	"side advertises its initial sequence number during this exchange, which is used to "
	"order and acknowledge packets throughout the connection. This ensures both parties "
	"are ready to send and receive before any application data flows."
	),
	"student_answers": {
	"open": "The TCP handshake uses SYN, SYN-ACK, and ACK to synchronize sequence numbers and confirm both sides are ready to communicate.",
	"wrong": "TCP uses a two-way handshake: SYN from client and ACK from server.",
	},
	},
	]


	def timed(fn, args, *kwargs):
	t0 = time.time()
	result = fn(args, *kwargs)
	return result, round(time.time() - t0, 2)


	def run_session(chunk_info: dict, session_id: str, model_id: str) -> list[dict]:
	from core.questioner import generate_question, generate_mcq
	from core.evaluator import evaluate_answer

	steps = []
	chunk = chunk_info["chunk"]
	topic = chunk_info["topic"]
	answers = chunk_info["student_answers"]

	print(f"\n{'='*60}")
	print(f"Topic: {topic}")
	print(f"{'='*60}")

	# Step 1 — Open question generation
	print("[1/4] Generating open question…")
	question, dur = timed(generate_question, chunk, language="English", difficulty="Normal")
	print(f" Q: {question} ({dur}s)")
	steps.append({
	"session_id": session_id,
	"step": 1,
	"type": "question_generation",
	"topic": topic,
	"input": {"chunk": chunk, "difficulty": "Normal", "language": "English"},
	"output": {"question": question},
	"duration_s": dur,
	"model": model_id,
	"timestamp": datetime.now(timezone.utc).isoformat(),
	})

	# Step 2 — Evaluate a correct answer
	print("[2/4] Evaluating correct answer…")
	feedback_ok, dur = timed(evaluate_answer, question, chunk, answers["open"], language="English")
	print(f" Feedback (correct): {feedback_ok[:80]}… ({dur}s)")
	steps.append({
	"session_id": session_id,
	"step": 2,
	"type": "answer_evaluation",
	"topic": topic,
	"input": {
	"chunk": chunk,
	"question": question,
	"student_answer": answers["open"],
	"expected_quality": "correct",
	},
	"output": {"feedback": feedback_ok},
	"duration_s": dur,
	"model": model_id,
	"timestamp": datetime.now(timezone.utc).isoformat(),
	})

	# Step 3 — Evaluate a wrong answer
	print("[3/4] Evaluating incorrect answer…")
	feedback_bad, dur = timed(evaluate_answer, question, chunk, answers["wrong"], language="English")
	print(f" Feedback (wrong): {feedback_bad[:80]}… ({dur}s)")
	steps.append({
	"session_id": session_id,
	"step": 3,
	"type": "answer_evaluation",
	"topic": topic,
	"input": {
	"chunk": chunk,
	"question": question,
	"student_answer": answers["wrong"],
	"expected_quality": "incorrect",
	},
	"output": {"feedback": feedback_bad},
	"duration_s": dur,
	"model": model_id,
	"timestamp": datetime.now(timezone.utc).isoformat(),
	})

	# Step 4 — MCQ generation
	print("[4/4] Generating MCQ…")
	mcq, dur = timed(generate_mcq, chunk, language="English")
	print(f" MCQ question: {str(mcq.get('question',''))[:80]} ({dur}s)")
	steps.append({
	"session_id": session_id,
	"step": 4,
	"type": "mcq_generation",
	"topic": topic,
	"input": {"chunk": chunk, "language": "English"},
	"output": {"mcq": mcq},
	"duration_s": dur,
	"model": model_id,
	"timestamp": datetime.now(timezone.utc).isoformat(),
	})

	return steps


	def push_trace(all_steps: list[dict], model_id: str):
	from huggingface_hub import HfApi

	token = os.environ.get("HF_TOKEN")
	api = HfApi(token=token)

	api.create_repo(TRACE_REPO, repo_type="dataset", exist_ok=True, private=False)

	# JSONL trace file
	jsonl = "\n".join(json.dumps(s, ensure_ascii=False) for s in all_steps)
	trace_bytes = jsonl.encode()

	api.upload_file(
	path_or_fileobj=trace_bytes,
	path_in_repo="paperprof_trace.jsonl",
	repo_id=TRACE_REPO,
	repo_type="dataset",
	commit_message="chore: upload PaperProf agent trace",
	)

	readme = f"""---
	license: apache-2.0
	task_categories:
	- question-answering
	- text-generation
	language:
	- en
	tags:
	- agent-trace
	- education
	- paperprof
	- build-small-hackathon
	---

	# PaperProf Agent Trace

	Step-by-step trace of [PaperProf](https://huggingface.co/spaces/build-small-hackathon/PaperProf),
	an AI study buddy that turns course PDFs into interactive quiz sessions.

	## What's in this dataset

	Each row in `paperprof_trace.jsonl` is one LLM call. Fields:

	\| Field \| Description \|
	\|---\|---\|
	\| `session_id` \| Groups steps from the same session \|
	\| `step` \| Step index within the session (1–4) \|
	\| `type` \| `question_generation` / `answer_evaluation` / `mcq_generation` \|
	\| `topic` \| Domain of the source chunk \|
	\| `input` \| Exact input sent to the model (chunk, question, student answer…) \|
	\| `output` \| Raw model output \|
	\| `duration_s` \| Wall-clock inference time \|
	\| `model` \| Model ID used \|

	## Session structure

	Each session runs 4 steps on one text chunk:
	1. Open question generation — the model writes a focused exam question
	2. Correct answer evaluation — structured tutor feedback on a good answer
	3. Wrong answer evaluation — structured tutor feedback on a bad answer
	4. MCQ generation — 4-option question with per-option explanations

	Three sessions are included, covering: Operating Systems, Machine Learning, and Networking.

	## Model

	`{model_id}`

	Built for the Build Small Hackathon, June 2026, by Team PaperProf (EPITA).
	"""
	api.upload_file(
	path_or_fileobj=readme.encode(),
	path_in_repo="README.md",
	repo_id=TRACE_REPO,
	repo_type="dataset",
	commit_message="chore: add dataset card",
	)

	print(f"\n✅ Trace pushed → https://huggingface.co/datasets/{TRACE_REPO}")


	def main():
	from model.llm import get_llm, DEFAULT_MODEL_ID

	print("Loading model (first call may take 60–90s locally)…")
	get_llm() # warm up
	model_id = os.environ.get("PAPERPROF_MODEL", DEFAULT_MODEL_ID)

	all_steps = []
	for chunk_info in DEMO_CHUNKS:
	session_id = str(uuid.uuid4())[:8]
	steps = run_session(chunk_info, session_id, model_id)
	all_steps.extend(steps)

	print(f"\n[push] {len(all_steps)} steps captured across {len(DEMO_CHUNKS)} sessions…")
	push_trace(all_steps, model_id)


	if __name__ == "__main__":
	main()