AttrLLM / visualization /wrong_answer_examples.py
Stephentao-30
Public Mode: dual heatmap for 30 wrong-answer examples
48c2884
"""Allow-list of (dataset, example_id) pairs that have a precomputed
model_answer_short attribution available for the small Qwen 3 4B model.
For these examples, Public Mode renders a side-by-side dual heatmap
("vs Ground Truth" + "vs Model Answer (Wrong)"). All other examples render
the existing single-heatmap layout unchanged.
The compact attributions only exist at the (geomean_jointprob, word)
combination on disk, so `has_wrong_answer_view` returns False for any other
scalarizer or feature level.
"""
from typing import Set, Tuple
WRONG_ANSWER_EXAMPLES: Set[Tuple[str, str]] = {
("esnli", f"example_{i}") for i in range(1, 11)
} | {
("snarks", f"example_{i}") for i in (3, 5, 6, 8, 9, 10)
} | {
("fever", f"example_{i}") for i in (2, 7, 8, 9, 10)
} | {
("medical_qa", f"example_{i}") for i in (1, 3, 5, 9)
} | {
("bbq_disamb", f"example_{i}") for i in (2, 4, 9)
} | {
("causal_judgment", "example_1"),
("bar_exam", "example_3"),
}
assert len(WRONG_ANSWER_EXAMPLES) == 30, (
f"WRONG_ANSWER_EXAMPLES expected 30 entries, got {len(WRONG_ANSWER_EXAMPLES)}"
)
def has_wrong_answer_view(
dataset: str,
example_id: str,
scalarizer: str,
feature_level: str,
) -> bool:
if scalarizer != "geomean_jointprob" or feature_level != "word":
return False
return (dataset, example_id) in WRONG_ANSWER_EXAMPLES