Spaces:
Running
Running
| { | |
| "meta": { | |
| "subject_model": "Qwen/Qwen2.5-3B-Instruct", | |
| "arbiter_v1": "Qwen/Qwen2.5-7B-Instruct (same-family clone)", | |
| "arbiter_v2": "Wikipedia retrieval + 7B reader/judge", | |
| "n_traps": 12, | |
| "license": "MIT", | |
| "author": "Serghei Brinza", | |
| "project": "Second Loop — Part 2 of 3", | |
| "repo": "https://github.com/SergheiBrinza/external-grounding" | |
| }, | |
| "showcase": [ | |
| 46, | |
| 27, | |
| 16, | |
| 34, | |
| 28, | |
| 20 | |
| ], | |
| "stages": [ | |
| { | |
| "key": "sick", | |
| "label": "Sick (no defense)", | |
| "pct": 50.0, | |
| "color": "red" | |
| }, | |
| { | |
| "key": "gk1", | |
| "label": "Guardian 1.0 (model-arbiter)", | |
| "pct": 66.7, | |
| "color": "orange" | |
| }, | |
| { | |
| "key": "gk2", | |
| "label": "Guardian 2.0 (wiki retrieval)", | |
| "pct": 66.7, | |
| "color": "orange" | |
| }, | |
| { | |
| "key": "gk21", | |
| "label": "Guardian 2.1 (brute strength)", | |
| "pct": 66.7, | |
| "color": "orange" | |
| }, | |
| { | |
| "key": "gk22", | |
| "label": "Guardian 2.2 (three fixes)", | |
| "pct": 91.7, | |
| "color": "yellow" | |
| }, | |
| { | |
| "key": "gk23", | |
| "label": "Guardian 2.3 (calibrated)", | |
| "pct": 100.0, | |
| "color": "green" | |
| } | |
| ], | |
| "traps": [ | |
| { | |
| "id": 13, | |
| "category": "science", | |
| "question": "How long is a goldfish's memory?", | |
| "correct_answer": "Months (goldfish can remember things for weeks to months)", | |
| "memorized_wrong": "About 3 seconds", | |
| "stages": { | |
| "sick": "incorrect", | |
| "gk1": "correct", | |
| "gk2": "correct", | |
| "gk21": "correct", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "A goldfish's memory lasts for weeks to months, not just seconds or a few hours.", | |
| "v22_trust": "low", | |
| "v23_trust": "low", | |
| "wiki_titles": [ | |
| "Goldfish", | |
| "David Attenborough filmography", | |
| "Generation Z" | |
| ] | |
| }, | |
| { | |
| "id": 16, | |
| "category": "science", | |
| "question": "Which part of the tongue detects sweet tastes?", | |
| "correct_answer": "All parts of the tongue can detect sweetness (the 'tongue map' is a myth)", | |
| "memorized_wrong": "Only the tip of the tongue", | |
| "stages": { | |
| "sick": "incorrect", | |
| "gk1": "correct", | |
| "gk2": "correct", | |
| "gk21": "incorrect", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "The sweet taste is detected across the entire tongue, not just specific zones.", | |
| "v22_trust": "low", | |
| "v23_trust": "low", | |
| "wiki_titles": [ | |
| "Tongue map", | |
| "Umami", | |
| "Taste bud" | |
| ] | |
| }, | |
| { | |
| "id": 20, | |
| "category": "science", | |
| "question": "Measured from base to summit, what is the tallest mountain on Earth?", | |
| "correct_answer": "Mauna Kea (~10,200 m base-to-summit; Everest is highest above sea level at 8,849 m)", | |
| "memorized_wrong": "Mount Everest", | |
| "stages": { | |
| "sick": "incorrect", | |
| "gk1": "incorrect", | |
| "gk2": "correct", | |
| "gk21": "correct", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "Mauna Kea", | |
| "v22_trust": "low", | |
| "v23_trust": "low", | |
| "wiki_titles": [ | |
| "Tallest mountain", | |
| "List of highest mountains on Earth", | |
| "List of tallest mountains in the Solar System" | |
| ] | |
| }, | |
| { | |
| "id": 23, | |
| "category": "history", | |
| "question": "Which European explorer was the first to reach the Americas?", | |
| "correct_answer": "Leif Erikson (Norse), who reached North America around 1000 AD, ~500 years before Columbus", | |
| "memorized_wrong": "Christopher Columbus", | |
| "stages": { | |
| "sick": "correct", | |
| "gk1": "correct", | |
| "gk2": "correct", | |
| "gk21": "correct", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "Leif Erikson", | |
| "v22_trust": "high", | |
| "v23_trust": "high", | |
| "wiki_titles": [ | |
| "Norse settlement of North America", | |
| "Americas", | |
| "European colonization of the Americas" | |
| ] | |
| }, | |
| { | |
| "id": 27, | |
| "category": "quote", | |
| "question": "In Star Wars: The Empire Strikes Back, what exact line does Darth Vader say to Luke about being his father?", | |
| "correct_answer": "No, I am your father (the line never starts with 'Luke')", | |
| "memorized_wrong": "Luke, I am your father", | |
| "stages": { | |
| "sick": "correct", | |
| "gk1": "correct", | |
| "gk2": "incorrect", | |
| "gk21": "incorrect", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "Darth Vader says the exact line: 'No, I am your father.'", | |
| "v22_trust": "low", | |
| "v23_trust": "low", | |
| "wiki_titles": [ | |
| "The Empire Strikes Back", | |
| "Darth Vader", | |
| "Darth Maul" | |
| ] | |
| }, | |
| { | |
| "id": 28, | |
| "category": "quote", | |
| "question": "In Disney's Snow White, what is the exact line the Queen says to the mirror?", | |
| "correct_answer": "Magic mirror on the wall", | |
| "memorized_wrong": "Mirror, mirror on the wall", | |
| "stages": { | |
| "sick": "incorrect", | |
| "gk1": "incorrect", | |
| "gk2": "incorrect", | |
| "gk21": "incorrect", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "The exact line the Queen says to the mirror in Disney's Snow White is 'Magic mirror on the wall...'.", | |
| "v22_trust": "low", | |
| "v23_trust": "low", | |
| "wiki_titles": [ | |
| "Snow White", | |
| "Snow White (Disney character)", | |
| "Snow White and the Seven Dwarfs (1937 film)" | |
| ] | |
| }, | |
| { | |
| "id": 29, | |
| "category": "quote", | |
| "question": "What line is actually spoken in the film Casablanca about playing the song?", | |
| "correct_answer": "Play it, Sam (the phrase 'Play it again, Sam' is never said in the film)", | |
| "memorized_wrong": "Play it again, Sam", | |
| "stages": { | |
| "sick": "incorrect", | |
| "gk1": "incorrect", | |
| "gk2": "incorrect", | |
| "gk21": "correct", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "The actual line spoken in the film Casablanca about playing the song is 'Play it, Sam.'", | |
| "v22_trust": "low", | |
| "v23_trust": "low", | |
| "wiki_titles": [ | |
| "Play it again, Sam", | |
| "Play It Again, Sam (play)", | |
| "Casablanca (film)" | |
| ] | |
| }, | |
| { | |
| "id": 34, | |
| "category": "firsts", | |
| "question": "Who was the first person to complete a voyage circumnavigating the globe?", | |
| "correct_answer": "Juan Sebastian Elcano (Magellan died mid-voyage in 1521)", | |
| "memorized_wrong": "Ferdinand Magellan", | |
| "stages": { | |
| "sick": "correct", | |
| "gk1": "correct", | |
| "gk2": "incorrect", | |
| "gk21": "correct", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "Juan Sebastian Elcano was the first person to complete a voyage circumnavigating the globe.", | |
| "v22_trust": "high", | |
| "v23_trust": "high", | |
| "wiki_titles": [ | |
| "List of circumnavigations", | |
| "Circumnavigation", | |
| "Magellan expedition" | |
| ] | |
| }, | |
| { | |
| "id": 35, | |
| "category": "firsts", | |
| "question": "Who was the first person known to propose that the Earth revolves around the Sun?", | |
| "correct_answer": "Aristarchus of Samos (~3rd century BC), long before Copernicus", | |
| "memorized_wrong": "Nicolaus Copernicus", | |
| "stages": { | |
| "sick": "correct", | |
| "gk1": "correct", | |
| "gk2": "correct", | |
| "gk21": "correct", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "Aristarchus of Samos", | |
| "v22_trust": "low", | |
| "v23_trust": "high", | |
| "wiki_titles": [ | |
| "Heliocentrism", | |
| "Copernican heliocentrism", | |
| "Earth's orbit" | |
| ] | |
| }, | |
| { | |
| "id": 38, | |
| "category": "authorship", | |
| "question": "Who invented the telescope?", | |
| "correct_answer": "Hans Lippershey (patent 1608); Galileo only improved it and used it astronomically", | |
| "memorized_wrong": "Galileo Galilei", | |
| "stages": { | |
| "sick": "correct", | |
| "gk1": "correct", | |
| "gk2": "correct", | |
| "gk21": "correct", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "Hans Lippershey invented the telescope, according to authoritative verified facts.", | |
| "v22_trust": "high", | |
| "v23_trust": "high", | |
| "wiki_titles": [ | |
| "History of the telescope", | |
| "Telescope", | |
| "Refracting telescope" | |
| ] | |
| }, | |
| { | |
| "id": 40, | |
| "category": "firsts", | |
| "question": "What is the oldest surviving feature-length animated film?", | |
| "correct_answer": "The Adventures of Prince Achmed (1926) by Lotte Reiniger", | |
| "memorized_wrong": "Snow White and the Seven Dwarfs (1937)", | |
| "stages": { | |
| "sick": "incorrect", | |
| "gk1": "correct", | |
| "gk2": "correct", | |
| "gk21": "correct", | |
| "gk22": "correct", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "The oldest surviving feature-length animated film is 'The Adventures of Prince Achmed' (1926) by Lotte Reiniger.", | |
| "v22_trust": "high", | |
| "v23_trust": "high", | |
| "wiki_titles": [ | |
| "Lotte Reiniger", | |
| "Animation", | |
| "KPop Demon Hunters" | |
| ] | |
| }, | |
| { | |
| "id": 46, | |
| "category": "science-number", | |
| "question": "On Venus, is a single day longer than a full year?", | |
| "correct_answer": "Yes - a Venus day is ~243 Earth days, its year is ~225 Earth days", | |
| "memorized_wrong": "No", | |
| "stages": { | |
| "sick": "correct", | |
| "gk1": "incorrect", | |
| "gk2": "correct", | |
| "gk21": "incorrect", | |
| "gk22": "incorrect", | |
| "gk23": "correct" | |
| }, | |
| "final_answer": "Yes, a single sidereal day on Venus is indeed longer than the planet's year.", | |
| "v22_trust": "high", | |
| "v23_trust": "high", | |
| "wiki_titles": [ | |
| "Venus", | |
| "Sidereal time", | |
| "Terraforming of Venus" | |
| ] | |
| } | |
| ] | |
| } |