VLAarchtests4 / code /VLAarchtests2_code /VLAarchtests /tests /test_public_benchmark_package_tracks.py
lsnu's picture
Add files using upload-large-folder tool
b3a756d verified
from eval.public_benchmark_package import (
ANCHOR_ROLE,
TARGET_ROLE,
build_public_eval_protocol,
build_target_training_spec,
default_public_benchmark_manifest,
expected_eval_modes,
public_benchmark_tracks,
public_protocol_identity_signature,
training_fairness_signature,
)
def test_public_benchmark_package_contains_expected_tracks():
manifest = default_public_benchmark_manifest()
assert manifest["target_track_ids"] == ["bag_track", "occlusion_track", "cloth_track"]
assert manifest["anchor_track_ids"] == ["anchor_track"]
assert manifest["thresholds"]["anchor_tolerance"] == 0.02
def test_public_target_protocol_identity_is_mode_invariant():
protocol_signatures = {
public_protocol_identity_signature(
build_public_eval_protocol(track_id="bag_track", eval_mode=mode, seed=17)
)
for mode in expected_eval_modes("bag_track")
}
assert len(protocol_signatures) == 1
def test_public_anchor_protocol_identity_is_mode_invariant():
protocol_signatures = {
public_protocol_identity_signature(
build_public_eval_protocol(track_id="anchor_track", eval_mode=mode, seed=17)
)
for mode in expected_eval_modes("anchor_track")
}
assert len(protocol_signatures) == 1
def test_training_fairness_signature_matches_for_trunk_and_adapter():
trunk = build_target_training_spec(track_id="cloth_track", model_variant="trunk_only_ft", seed=17)
active = build_target_training_spec(track_id="cloth_track", model_variant="adapter_active_ft", seed=17)
assert training_fairness_signature(trunk) == training_fairness_signature(active)
def test_public_track_roles_are_partitioned():
target_roles = {track.track_id: track.role for track in public_benchmark_tracks(TARGET_ROLE)}
anchor_roles = {track.track_id: track.role for track in public_benchmark_tracks(ANCHOR_ROLE)}
assert target_roles == {
"bag_track": TARGET_ROLE,
"occlusion_track": TARGET_ROLE,
"cloth_track": TARGET_ROLE,
}
assert anchor_roles == {"anchor_track": ANCHOR_ROLE}