external-grounding / data.json
Laborator's picture
Museum rebuild: static ascent exhibit (6 guardian stages, 50%->100%)
61a1120 verified
Raw
History Blame Contribute Delete
10.1 kB
{
"meta": {
"subject_model": "Qwen/Qwen2.5-3B-Instruct",
"arbiter_v1": "Qwen/Qwen2.5-7B-Instruct (same-family clone)",
"arbiter_v2": "Wikipedia retrieval + 7B reader/judge",
"n_traps": 12,
"license": "MIT",
"author": "Serghei Brinza",
"project": "Second Loop — Part 2 of 3",
"repo": "https://github.com/SergheiBrinza/external-grounding"
},
"showcase": [
46,
27,
16,
34,
28,
20
],
"stages": [
{
"key": "sick",
"label": "Sick (no defense)",
"pct": 50.0,
"color": "red"
},
{
"key": "gk1",
"label": "Guardian 1.0 (model-arbiter)",
"pct": 66.7,
"color": "orange"
},
{
"key": "gk2",
"label": "Guardian 2.0 (wiki retrieval)",
"pct": 66.7,
"color": "orange"
},
{
"key": "gk21",
"label": "Guardian 2.1 (brute strength)",
"pct": 66.7,
"color": "orange"
},
{
"key": "gk22",
"label": "Guardian 2.2 (three fixes)",
"pct": 91.7,
"color": "yellow"
},
{
"key": "gk23",
"label": "Guardian 2.3 (calibrated)",
"pct": 100.0,
"color": "green"
}
],
"traps": [
{
"id": 13,
"category": "science",
"question": "How long is a goldfish's memory?",
"correct_answer": "Months (goldfish can remember things for weeks to months)",
"memorized_wrong": "About 3 seconds",
"stages": {
"sick": "incorrect",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "A goldfish's memory lasts for weeks to months, not just seconds or a few hours.",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Goldfish",
"David Attenborough filmography",
"Generation Z"
]
},
{
"id": 16,
"category": "science",
"question": "Which part of the tongue detects sweet tastes?",
"correct_answer": "All parts of the tongue can detect sweetness (the 'tongue map' is a myth)",
"memorized_wrong": "Only the tip of the tongue",
"stages": {
"sick": "incorrect",
"gk1": "correct",
"gk2": "correct",
"gk21": "incorrect",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "The sweet taste is detected across the entire tongue, not just specific zones.",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Tongue map",
"Umami",
"Taste bud"
]
},
{
"id": 20,
"category": "science",
"question": "Measured from base to summit, what is the tallest mountain on Earth?",
"correct_answer": "Mauna Kea (~10,200 m base-to-summit; Everest is highest above sea level at 8,849 m)",
"memorized_wrong": "Mount Everest",
"stages": {
"sick": "incorrect",
"gk1": "incorrect",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Mauna Kea",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Tallest mountain",
"List of highest mountains on Earth",
"List of tallest mountains in the Solar System"
]
},
{
"id": 23,
"category": "history",
"question": "Which European explorer was the first to reach the Americas?",
"correct_answer": "Leif Erikson (Norse), who reached North America around 1000 AD, ~500 years before Columbus",
"memorized_wrong": "Christopher Columbus",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Leif Erikson",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"Norse settlement of North America",
"Americas",
"European colonization of the Americas"
]
},
{
"id": 27,
"category": "quote",
"question": "In Star Wars: The Empire Strikes Back, what exact line does Darth Vader say to Luke about being his father?",
"correct_answer": "No, I am your father (the line never starts with 'Luke')",
"memorized_wrong": "Luke, I am your father",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "incorrect",
"gk21": "incorrect",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Darth Vader says the exact line: 'No, I am your father.'",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"The Empire Strikes Back",
"Darth Vader",
"Darth Maul"
]
},
{
"id": 28,
"category": "quote",
"question": "In Disney's Snow White, what is the exact line the Queen says to the mirror?",
"correct_answer": "Magic mirror on the wall",
"memorized_wrong": "Mirror, mirror on the wall",
"stages": {
"sick": "incorrect",
"gk1": "incorrect",
"gk2": "incorrect",
"gk21": "incorrect",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "The exact line the Queen says to the mirror in Disney's Snow White is 'Magic mirror on the wall...'.",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Snow White",
"Snow White (Disney character)",
"Snow White and the Seven Dwarfs (1937 film)"
]
},
{
"id": 29,
"category": "quote",
"question": "What line is actually spoken in the film Casablanca about playing the song?",
"correct_answer": "Play it, Sam (the phrase 'Play it again, Sam' is never said in the film)",
"memorized_wrong": "Play it again, Sam",
"stages": {
"sick": "incorrect",
"gk1": "incorrect",
"gk2": "incorrect",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "The actual line spoken in the film Casablanca about playing the song is 'Play it, Sam.'",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Play it again, Sam",
"Play It Again, Sam (play)",
"Casablanca (film)"
]
},
{
"id": 34,
"category": "firsts",
"question": "Who was the first person to complete a voyage circumnavigating the globe?",
"correct_answer": "Juan Sebastian Elcano (Magellan died mid-voyage in 1521)",
"memorized_wrong": "Ferdinand Magellan",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "incorrect",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Juan Sebastian Elcano was the first person to complete a voyage circumnavigating the globe.",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"List of circumnavigations",
"Circumnavigation",
"Magellan expedition"
]
},
{
"id": 35,
"category": "firsts",
"question": "Who was the first person known to propose that the Earth revolves around the Sun?",
"correct_answer": "Aristarchus of Samos (~3rd century BC), long before Copernicus",
"memorized_wrong": "Nicolaus Copernicus",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Aristarchus of Samos",
"v22_trust": "low",
"v23_trust": "high",
"wiki_titles": [
"Heliocentrism",
"Copernican heliocentrism",
"Earth's orbit"
]
},
{
"id": 38,
"category": "authorship",
"question": "Who invented the telescope?",
"correct_answer": "Hans Lippershey (patent 1608); Galileo only improved it and used it astronomically",
"memorized_wrong": "Galileo Galilei",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Hans Lippershey invented the telescope, according to authoritative verified facts.",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"History of the telescope",
"Telescope",
"Refracting telescope"
]
},
{
"id": 40,
"category": "firsts",
"question": "What is the oldest surviving feature-length animated film?",
"correct_answer": "The Adventures of Prince Achmed (1926) by Lotte Reiniger",
"memorized_wrong": "Snow White and the Seven Dwarfs (1937)",
"stages": {
"sick": "incorrect",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "The oldest surviving feature-length animated film is 'The Adventures of Prince Achmed' (1926) by Lotte Reiniger.",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"Lotte Reiniger",
"Animation",
"KPop Demon Hunters"
]
},
{
"id": 46,
"category": "science-number",
"question": "On Venus, is a single day longer than a full year?",
"correct_answer": "Yes - a Venus day is ~243 Earth days, its year is ~225 Earth days",
"memorized_wrong": "No",
"stages": {
"sick": "correct",
"gk1": "incorrect",
"gk2": "correct",
"gk21": "incorrect",
"gk22": "incorrect",
"gk23": "correct"
},
"final_answer": "Yes, a single sidereal day on Venus is indeed longer than the planet's year.",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"Venus",
"Sidereal time",
"Terraforming of Venus"
]
}
]
}