File size: 2,457 Bytes
2b2e65d
 
 
e12a049
 
2b2e65d
 
 
 
 
 
 
e12a049
2b2e65d
9eec184
2b2e65d
 
 
 
 
 
 
 
 
 
 
 
e12a049
2b2e65d
 
9eec184
2b2e65d
 
 
beeebb1
 
 
9eec184
beeebb1
2b2e65d
 
 
 
 
 
e12a049
2b2e65d
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json
from pathlib import Path

from tests.helpers import load_test_index

from hackathon_advisor.agent import AdvisorEngine
from hackathon_advisor.data import ProjectIndex
from hackathon_advisor.lora_dataset import BASE_MODEL, build_lora_dataset_jsonl
from hackathon_advisor.trace_export import trace_metadata


def test_lora_dataset_exports_tool_call_and_response_examples() -> None:
    index = load_test_index()
    engine = AdvisorEngine(index)
    state = {"goals": ["Well-Tuned", "Field Notes"]}
    state = engine.turn("A local-first archive cartographer for family photos", state).state
    state = engine.turn("make a build plan", state).state

    lines = [json.loads(line) for line in build_lora_dataset_jsonl(state, trace_metadata(index)).splitlines()]
    manifest = lines[0]
    examples = lines[1:]

    assert manifest["type"] == "lora_sft_manifest"
    assert manifest["base_model"] == BASE_MODEL
    assert manifest["record_kinds"] == ["tool_call", "advisor_response"]
    assert manifest["example_count"] == len(examples)
    assert manifest["included_turn_count"] == 2
    assert manifest["index"]["algorithm"] == "llama-cpp-embedding-v1"
    assert {example["example_kind"] for example in examples} == {"tool_call", "advisor_response"}
    assert examples[0]["messages"][2]["content"].startswith('<function name="save_idea">')
    assert examples[0]["goals"] == ["Well-Tuned", "Field Notes"]
    assert examples[1]["messages"][1]["content"].startswith("A local-first archive")
    assert "Tool observations:" in examples[1]["messages"][1]["content"]
    assert examples[1]["messages"][2]["content"]
    system_messages = "\n".join(example["messages"][0]["content"] for example in examples)
    assert "Mothback" not in system_messages
    assert "Build Small" not in system_messages
    assert "prize " + "tar" + "gets" not in system_messages
    assert "selected goals" in system_messages


def test_empty_lora_dataset_only_exports_manifest() -> None:
    payload = build_lora_dataset_jsonl(
        {},
        {
            "index_algorithm": "llama-cpp-embedding-v1",
            "snapshot_generated_at": "2026-06-06T00:00:00+00:00",
            "index_generated_at": "2026-06-06T01:00:00+00:00",
            "snapshot_digest": "abc",
        },
    )
    lines = [json.loads(line) for line in payload.splitlines()]

    assert len(lines) == 1
    assert lines[0]["example_count"] == 0
    assert lines[0]["turn_count"] == 0