| """Allow-list of (dataset, example_id) pairs that have a precomputed |
| model_answer_short attribution available for the small Qwen 3 4B model. |
| |
| For these examples, Public Mode renders a side-by-side dual heatmap |
| ("vs Ground Truth" + "vs Model Answer (Wrong)"). All other examples render |
| the existing single-heatmap layout unchanged. |
| |
| The compact attributions only exist at the (geomean_jointprob, word) |
| combination on disk, so `has_wrong_answer_view` returns False for any other |
| scalarizer or feature level. |
| """ |
|
|
| from typing import Set, Tuple |
|
|
| WRONG_ANSWER_EXAMPLES: Set[Tuple[str, str]] = { |
| ("esnli", f"example_{i}") for i in range(1, 11) |
| } | { |
| ("snarks", f"example_{i}") for i in (3, 5, 6, 8, 9, 10) |
| } | { |
| ("fever", f"example_{i}") for i in (2, 7, 8, 9, 10) |
| } | { |
| ("medical_qa", f"example_{i}") for i in (1, 3, 5, 9) |
| } | { |
| ("bbq_disamb", f"example_{i}") for i in (2, 4, 9) |
| } | { |
| ("causal_judgment", "example_1"), |
| ("bar_exam", "example_3"), |
| } |
|
|
| assert len(WRONG_ANSWER_EXAMPLES) == 30, ( |
| f"WRONG_ANSWER_EXAMPLES expected 30 entries, got {len(WRONG_ANSWER_EXAMPLES)}" |
| ) |
|
|
|
|
| def has_wrong_answer_view( |
| dataset: str, |
| example_id: str, |
| scalarizer: str, |
| feature_level: str, |
| ) -> bool: |
| if scalarizer != "geomean_jointprob" or feature_level != "word": |
| return False |
| return (dataset, example_id) in WRONG_ANSWER_EXAMPLES |
|
|