"""Allow-list of (dataset, example_id) pairs that have a precomputed model_answer_short attribution available for the small Qwen 3 4B model. For these examples, Public Mode renders a side-by-side dual heatmap ("vs Ground Truth" + "vs Model Answer (Wrong)"). All other examples render the existing single-heatmap layout unchanged. The compact attributions only exist at the (geomean_jointprob, word) combination on disk, so `has_wrong_answer_view` returns False for any other scalarizer or feature level. """ from typing import Set, Tuple WRONG_ANSWER_EXAMPLES: Set[Tuple[str, str]] = { ("esnli", f"example_{i}") for i in range(1, 11) } | { ("snarks", f"example_{i}") for i in (3, 5, 6, 8, 9, 10) } | { ("fever", f"example_{i}") for i in (2, 7, 8, 9, 10) } | { ("medical_qa", f"example_{i}") for i in (1, 3, 5, 9) } | { ("bbq_disamb", f"example_{i}") for i in (2, 4, 9) } | { ("causal_judgment", "example_1"), ("bar_exam", "example_3"), } assert len(WRONG_ANSWER_EXAMPLES) == 30, ( f"WRONG_ANSWER_EXAMPLES expected 30 entries, got {len(WRONG_ANSWER_EXAMPLES)}" ) def has_wrong_answer_view( dataset: str, example_id: str, scalarizer: str, feature_level: str, ) -> bool: if scalarizer != "geomean_jointprob" or feature_level != "word": return False return (dataset, example_id) in WRONG_ANSWER_EXAMPLES