Spaces:
Running on Zero
Running on Zero
File size: 3,535 Bytes
e0cdb73 e12a049 e0cdb73 e12a049 e0cdb73 3fe3bd5 e0cdb73 3fe3bd5 e0cdb73 3fe3bd5 e0cdb73 3fe3bd5 e0cdb73 e12a049 e0cdb73 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | import json
import subprocess
import sys
from io import BytesIO
from pathlib import Path
from tests.helpers import load_test_index
from zipfile import ZipFile
from hackathon_advisor.agent import AdvisorEngine
from hackathon_advisor.data import ProjectIndex
from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
from hackathon_advisor.lora_training_kit import (
build_lora_training_kit_zip,
parse_lora_dataset_jsonl,
)
from hackathon_advisor.prize_ledger import prize_ledger
from hackathon_advisor.trace_export import trace_metadata
def test_lora_training_kit_contains_recipe_and_model_card() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
metadata = {
**trace_metadata(index),
"project_count": len(index.projects),
}
demo = build_demo_rehearsal(engine)
content = build_lora_training_kit_zip(
demo["session"],
metadata,
prize_ledger(engine.runtime_status()),
)
with ZipFile(BytesIO(content)) as archive:
names = set(archive.namelist())
manifest = json.loads(archive.read("manifest.json"))
recipe = json.loads(archive.read("training-recipe.json"))
model_card = archive.read("adapter-model-card.md").decode("utf-8")
command = archive.read("train-command.txt").decode("utf-8")
assert names == {
"manifest.json",
"lora-sft.jsonl",
"training-recipe.json",
"adapter-model-card.md",
"train-command.txt",
"README.md",
}
assert manifest["type"] == "lora_training_kit_manifest"
assert manifest["publish_status"] == "published"
assert recipe["base_model"] == "openbmb/MiniCPM5-1B"
assert recipe["adapter_repo"] == "build-small-hackathon/hackathon-advisor-minicpm5-lora"
assert recipe["example_count"] == manifest["example_count"]
assert "PEFT LoRA adapter is trained" in model_card
assert "scripts/train_minicpm_lora.py" in command
assert "--push-to-hub" in command
assert "--hub-repo-id build-small-hackathon/hackathon-advisor-minicpm5-lora" in command
def test_parse_lora_dataset_jsonl_rejects_empty_payload() -> None:
try:
parse_lora_dataset_jsonl("")
except ValueError as error:
assert "empty" in str(error)
else:
raise AssertionError("empty dataset should be rejected")
def test_train_minicpm_lora_dry_run_writes_recipe(tmp_path: Path) -> None:
index = load_test_index()
engine = AdvisorEngine(index)
metadata = {
**trace_metadata(index),
"project_count": len(index.projects),
}
dataset_path = tmp_path / "lora-sft.jsonl"
output_dir = tmp_path / "dry-run"
dataset_path.write_text(
build_lora_dataset_jsonl(build_demo_rehearsal(engine)["session"], metadata),
encoding="utf-8",
)
result = subprocess.run(
[
sys.executable,
"scripts/train_minicpm_lora.py",
"--dataset",
str(dataset_path),
"--output-dir",
str(output_dir),
"--max-steps",
"7",
"--dry-run",
],
check=True,
capture_output=True,
text=True,
)
recipe = json.loads((output_dir / "training-recipe.json").read_text(encoding="utf-8"))
assert "dry-run ok" in result.stdout
assert recipe["example_count"] > 0
assert recipe["max_steps"] == 7
assert (output_dir / "train-command.txt").is_file()
|