Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

File size: 3,535 Bytes

e0cdb73
 
 
 
 
e12a049
 
e0cdb73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e12a049
e0cdb73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fe3bd5
e0cdb73
3fe3bd5
e0cdb73
3fe3bd5
e0cdb73
3fe3bd5
 
e0cdb73
 
 
 
 
 
 
 
 
 
 
 
e12a049
e0cdb73

import json
import subprocess
import sys
from io import BytesIO
from pathlib import Path

from tests.helpers import load_test_index
from zipfile import ZipFile

from hackathon_advisor.agent import AdvisorEngine
from hackathon_advisor.data import ProjectIndex
from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
from hackathon_advisor.lora_training_kit import (
    build_lora_training_kit_zip,
    parse_lora_dataset_jsonl,
)
from hackathon_advisor.prize_ledger import prize_ledger
from hackathon_advisor.trace_export import trace_metadata


def test_lora_training_kit_contains_recipe_and_model_card() -> None:
    index = load_test_index()
    engine = AdvisorEngine(index)
    metadata = {
        **trace_metadata(index),
        "project_count": len(index.projects),
    }
    demo = build_demo_rehearsal(engine)
    content = build_lora_training_kit_zip(
        demo["session"],
        metadata,
        prize_ledger(engine.runtime_status()),
    )

    with ZipFile(BytesIO(content)) as archive:
        names = set(archive.namelist())
        manifest = json.loads(archive.read("manifest.json"))
        recipe = json.loads(archive.read("training-recipe.json"))
        model_card = archive.read("adapter-model-card.md").decode("utf-8")
        command = archive.read("train-command.txt").decode("utf-8")

    assert names == {
        "manifest.json",
        "lora-sft.jsonl",
        "training-recipe.json",
        "adapter-model-card.md",
        "train-command.txt",
        "README.md",
    }
    assert manifest["type"] == "lora_training_kit_manifest"
    assert manifest["publish_status"] == "published"
    assert recipe["base_model"] == "openbmb/MiniCPM5-1B"
    assert recipe["adapter_repo"] == "build-small-hackathon/hackathon-advisor-minicpm5-lora"
    assert recipe["example_count"] == manifest["example_count"]
    assert "PEFT LoRA adapter is trained" in model_card
    assert "scripts/train_minicpm_lora.py" in command
    assert "--push-to-hub" in command
    assert "--hub-repo-id build-small-hackathon/hackathon-advisor-minicpm5-lora" in command


def test_parse_lora_dataset_jsonl_rejects_empty_payload() -> None:
    try:
        parse_lora_dataset_jsonl("")
    except ValueError as error:
        assert "empty" in str(error)
    else:
        raise AssertionError("empty dataset should be rejected")


def test_train_minicpm_lora_dry_run_writes_recipe(tmp_path: Path) -> None:
    index = load_test_index()
    engine = AdvisorEngine(index)
    metadata = {
        **trace_metadata(index),
        "project_count": len(index.projects),
    }
    dataset_path = tmp_path / "lora-sft.jsonl"
    output_dir = tmp_path / "dry-run"
    dataset_path.write_text(
        build_lora_dataset_jsonl(build_demo_rehearsal(engine)["session"], metadata),
        encoding="utf-8",
    )

    result = subprocess.run(
        [
            sys.executable,
            "scripts/train_minicpm_lora.py",
            "--dataset",
            str(dataset_path),
            "--output-dir",
            str(output_dir),
            "--max-steps",
            "7",
            "--dry-run",
        ],
        check=True,
        capture_output=True,
        text=True,
    )
    recipe = json.loads((output_dir / "training-recipe.json").read_text(encoding="utf-8"))

    assert "dry-run ok" in result.stdout
    assert recipe["example_count"] > 0
    assert recipe["max_steps"] == 7
    assert (output_dir / "train-command.txt").is_file()