File size: 1,371 Bytes
48c2884 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | """Allow-list of (dataset, example_id) pairs that have a precomputed
model_answer_short attribution available for the small Qwen 3 4B model.
For these examples, Public Mode renders a side-by-side dual heatmap
("vs Ground Truth" + "vs Model Answer (Wrong)"). All other examples render
the existing single-heatmap layout unchanged.
The compact attributions only exist at the (geomean_jointprob, word)
combination on disk, so `has_wrong_answer_view` returns False for any other
scalarizer or feature level.
"""
from typing import Set, Tuple
WRONG_ANSWER_EXAMPLES: Set[Tuple[str, str]] = {
("esnli", f"example_{i}") for i in range(1, 11)
} | {
("snarks", f"example_{i}") for i in (3, 5, 6, 8, 9, 10)
} | {
("fever", f"example_{i}") for i in (2, 7, 8, 9, 10)
} | {
("medical_qa", f"example_{i}") for i in (1, 3, 5, 9)
} | {
("bbq_disamb", f"example_{i}") for i in (2, 4, 9)
} | {
("causal_judgment", "example_1"),
("bar_exam", "example_3"),
}
assert len(WRONG_ANSWER_EXAMPLES) == 30, (
f"WRONG_ANSWER_EXAMPLES expected 30 entries, got {len(WRONG_ANSWER_EXAMPLES)}"
)
def has_wrong_answer_view(
dataset: str,
example_id: str,
scalarizer: str,
feature_level: str,
) -> bool:
if scalarizer != "geomean_jointprob" or feature_level != "word":
return False
return (dataset, example_id) in WRONG_ANSWER_EXAMPLES
|