VLAarchtests4 / code /VLAarchtests2_code /VLAarchtests /tests /test_public_benchmark_package_tracks.py
| from eval.public_benchmark_package import ( | |
| ANCHOR_ROLE, | |
| TARGET_ROLE, | |
| build_public_eval_protocol, | |
| build_target_training_spec, | |
| default_public_benchmark_manifest, | |
| expected_eval_modes, | |
| public_benchmark_tracks, | |
| public_protocol_identity_signature, | |
| training_fairness_signature, | |
| ) | |
| def test_public_benchmark_package_contains_expected_tracks(): | |
| manifest = default_public_benchmark_manifest() | |
| assert manifest["target_track_ids"] == ["bag_track", "occlusion_track", "cloth_track"] | |
| assert manifest["anchor_track_ids"] == ["anchor_track"] | |
| assert manifest["thresholds"]["anchor_tolerance"] == 0.02 | |
| def test_public_target_protocol_identity_is_mode_invariant(): | |
| protocol_signatures = { | |
| public_protocol_identity_signature( | |
| build_public_eval_protocol(track_id="bag_track", eval_mode=mode, seed=17) | |
| ) | |
| for mode in expected_eval_modes("bag_track") | |
| } | |
| assert len(protocol_signatures) == 1 | |
| def test_public_anchor_protocol_identity_is_mode_invariant(): | |
| protocol_signatures = { | |
| public_protocol_identity_signature( | |
| build_public_eval_protocol(track_id="anchor_track", eval_mode=mode, seed=17) | |
| ) | |
| for mode in expected_eval_modes("anchor_track") | |
| } | |
| assert len(protocol_signatures) == 1 | |
| def test_training_fairness_signature_matches_for_trunk_and_adapter(): | |
| trunk = build_target_training_spec(track_id="cloth_track", model_variant="trunk_only_ft", seed=17) | |
| active = build_target_training_spec(track_id="cloth_track", model_variant="adapter_active_ft", seed=17) | |
| assert training_fairness_signature(trunk) == training_fairness_signature(active) | |
| def test_public_track_roles_are_partitioned(): | |
| target_roles = {track.track_id: track.role for track in public_benchmark_tracks(TARGET_ROLE)} | |
| anchor_roles = {track.track_id: track.role for track in public_benchmark_tracks(ANCHOR_ROLE)} | |
| assert target_roles == { | |
| "bag_track": TARGET_ROLE, | |
| "occlusion_track": TARGET_ROLE, | |
| "cloth_track": TARGET_ROLE, | |
| } | |
| assert anchor_roles == {"anchor_track": ANCHOR_ROLE} | |