Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

App Files Files Community

hackathon-advisor / tests /test_lora_training_kit.py

JacobLinCool

feat: build retrieval index with llama cpp

e12a049 verified 4 days ago

raw

history blame contribute delete

3.54 kB

	import json
	import subprocess
	import sys
	from io import BytesIO
	from pathlib import Path

	from tests.helpers import load_test_index
	from zipfile import ZipFile

	from hackathon_advisor.agent import AdvisorEngine
	from hackathon_advisor.data import ProjectIndex
	from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
	from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
	from hackathon_advisor.lora_training_kit import (
	build_lora_training_kit_zip,
	parse_lora_dataset_jsonl,
	)
	from hackathon_advisor.prize_ledger import prize_ledger
	from hackathon_advisor.trace_export import trace_metadata


	def test_lora_training_kit_contains_recipe_and_model_card() -> None:
	index = load_test_index()
	engine = AdvisorEngine(index)
	metadata = {
	**trace_metadata(index),
	"project_count": len(index.projects),
	}
	demo = build_demo_rehearsal(engine)
	content = build_lora_training_kit_zip(
	demo["session"],
	metadata,
	prize_ledger(engine.runtime_status()),
	)

	with ZipFile(BytesIO(content)) as archive:
	names = set(archive.namelist())
	manifest = json.loads(archive.read("manifest.json"))
	recipe = json.loads(archive.read("training-recipe.json"))
	model_card = archive.read("adapter-model-card.md").decode("utf-8")
	command = archive.read("train-command.txt").decode("utf-8")

	assert names == {
	"manifest.json",
	"lora-sft.jsonl",
	"training-recipe.json",
	"adapter-model-card.md",
	"train-command.txt",
	"README.md",
	}
	assert manifest["type"] == "lora_training_kit_manifest"
	assert manifest["publish_status"] == "published"
	assert recipe["base_model"] == "openbmb/MiniCPM5-1B"
	assert recipe["adapter_repo"] == "build-small-hackathon/hackathon-advisor-minicpm5-lora"
	assert recipe["example_count"] == manifest["example_count"]
	assert "PEFT LoRA adapter is trained" in model_card
	assert "scripts/train_minicpm_lora.py" in command
	assert "--push-to-hub" in command
	assert "--hub-repo-id build-small-hackathon/hackathon-advisor-minicpm5-lora" in command


	def test_parse_lora_dataset_jsonl_rejects_empty_payload() -> None:
	try:
	parse_lora_dataset_jsonl("")
	except ValueError as error:
	assert "empty" in str(error)
	else:
	raise AssertionError("empty dataset should be rejected")


	def test_train_minicpm_lora_dry_run_writes_recipe(tmp_path: Path) -> None:
	index = load_test_index()
	engine = AdvisorEngine(index)
	metadata = {
	**trace_metadata(index),
	"project_count": len(index.projects),
	}
	dataset_path = tmp_path / "lora-sft.jsonl"
	output_dir = tmp_path / "dry-run"
	dataset_path.write_text(
	build_lora_dataset_jsonl(build_demo_rehearsal(engine)["session"], metadata),
	encoding="utf-8",
	)

	result = subprocess.run(
	[
	sys.executable,
	"scripts/train_minicpm_lora.py",
	"--dataset",
	str(dataset_path),
	"--output-dir",
	str(output_dir),
	"--max-steps",
	"7",
	"--dry-run",
	],
	check=True,
	capture_output=True,
	text=True,
	)
	recipe = json.loads((output_dir / "training-recipe.json").read_text(encoding="utf-8"))

	assert "dry-run ok" in result.stdout
	assert recipe["example_count"] > 0
	assert recipe["max_steps"] == 7
	assert (output_dir / "train-command.txt").is_file()