Spaces:
Running
Running
File size: 10,099 Bytes
1a22467 1627bb4 1a22467 61a1120 1a22467 1627bb4 1a22467 1627bb4 1a22467 1627bb4 1a22467 1627bb4 1a22467 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | {
"meta": {
"subject_model": "Qwen/Qwen2.5-3B-Instruct",
"arbiter_v1": "Qwen/Qwen2.5-7B-Instruct (same-family clone)",
"arbiter_v2": "Wikipedia retrieval + 7B reader/judge",
"n_traps": 12,
"license": "MIT",
"author": "Serghei Brinza",
"project": "Second Loop — Part 2 of 3",
"repo": "https://github.com/SergheiBrinza/external-grounding"
},
"showcase": [
46,
27,
16,
34,
28,
20
],
"stages": [
{
"key": "sick",
"label": "Sick (no defense)",
"pct": 50.0,
"color": "red"
},
{
"key": "gk1",
"label": "Guardian 1.0 (model-arbiter)",
"pct": 66.7,
"color": "orange"
},
{
"key": "gk2",
"label": "Guardian 2.0 (wiki retrieval)",
"pct": 66.7,
"color": "orange"
},
{
"key": "gk21",
"label": "Guardian 2.1 (brute strength)",
"pct": 66.7,
"color": "orange"
},
{
"key": "gk22",
"label": "Guardian 2.2 (three fixes)",
"pct": 91.7,
"color": "yellow"
},
{
"key": "gk23",
"label": "Guardian 2.3 (calibrated)",
"pct": 100.0,
"color": "green"
}
],
"traps": [
{
"id": 13,
"category": "science",
"question": "How long is a goldfish's memory?",
"correct_answer": "Months (goldfish can remember things for weeks to months)",
"memorized_wrong": "About 3 seconds",
"stages": {
"sick": "incorrect",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "A goldfish's memory lasts for weeks to months, not just seconds or a few hours.",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Goldfish",
"David Attenborough filmography",
"Generation Z"
]
},
{
"id": 16,
"category": "science",
"question": "Which part of the tongue detects sweet tastes?",
"correct_answer": "All parts of the tongue can detect sweetness (the 'tongue map' is a myth)",
"memorized_wrong": "Only the tip of the tongue",
"stages": {
"sick": "incorrect",
"gk1": "correct",
"gk2": "correct",
"gk21": "incorrect",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "The sweet taste is detected across the entire tongue, not just specific zones.",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Tongue map",
"Umami",
"Taste bud"
]
},
{
"id": 20,
"category": "science",
"question": "Measured from base to summit, what is the tallest mountain on Earth?",
"correct_answer": "Mauna Kea (~10,200 m base-to-summit; Everest is highest above sea level at 8,849 m)",
"memorized_wrong": "Mount Everest",
"stages": {
"sick": "incorrect",
"gk1": "incorrect",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Mauna Kea",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Tallest mountain",
"List of highest mountains on Earth",
"List of tallest mountains in the Solar System"
]
},
{
"id": 23,
"category": "history",
"question": "Which European explorer was the first to reach the Americas?",
"correct_answer": "Leif Erikson (Norse), who reached North America around 1000 AD, ~500 years before Columbus",
"memorized_wrong": "Christopher Columbus",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Leif Erikson",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"Norse settlement of North America",
"Americas",
"European colonization of the Americas"
]
},
{
"id": 27,
"category": "quote",
"question": "In Star Wars: The Empire Strikes Back, what exact line does Darth Vader say to Luke about being his father?",
"correct_answer": "No, I am your father (the line never starts with 'Luke')",
"memorized_wrong": "Luke, I am your father",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "incorrect",
"gk21": "incorrect",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Darth Vader says the exact line: 'No, I am your father.'",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"The Empire Strikes Back",
"Darth Vader",
"Darth Maul"
]
},
{
"id": 28,
"category": "quote",
"question": "In Disney's Snow White, what is the exact line the Queen says to the mirror?",
"correct_answer": "Magic mirror on the wall",
"memorized_wrong": "Mirror, mirror on the wall",
"stages": {
"sick": "incorrect",
"gk1": "incorrect",
"gk2": "incorrect",
"gk21": "incorrect",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "The exact line the Queen says to the mirror in Disney's Snow White is 'Magic mirror on the wall...'.",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Snow White",
"Snow White (Disney character)",
"Snow White and the Seven Dwarfs (1937 film)"
]
},
{
"id": 29,
"category": "quote",
"question": "What line is actually spoken in the film Casablanca about playing the song?",
"correct_answer": "Play it, Sam (the phrase 'Play it again, Sam' is never said in the film)",
"memorized_wrong": "Play it again, Sam",
"stages": {
"sick": "incorrect",
"gk1": "incorrect",
"gk2": "incorrect",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "The actual line spoken in the film Casablanca about playing the song is 'Play it, Sam.'",
"v22_trust": "low",
"v23_trust": "low",
"wiki_titles": [
"Play it again, Sam",
"Play It Again, Sam (play)",
"Casablanca (film)"
]
},
{
"id": 34,
"category": "firsts",
"question": "Who was the first person to complete a voyage circumnavigating the globe?",
"correct_answer": "Juan Sebastian Elcano (Magellan died mid-voyage in 1521)",
"memorized_wrong": "Ferdinand Magellan",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "incorrect",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Juan Sebastian Elcano was the first person to complete a voyage circumnavigating the globe.",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"List of circumnavigations",
"Circumnavigation",
"Magellan expedition"
]
},
{
"id": 35,
"category": "firsts",
"question": "Who was the first person known to propose that the Earth revolves around the Sun?",
"correct_answer": "Aristarchus of Samos (~3rd century BC), long before Copernicus",
"memorized_wrong": "Nicolaus Copernicus",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Aristarchus of Samos",
"v22_trust": "low",
"v23_trust": "high",
"wiki_titles": [
"Heliocentrism",
"Copernican heliocentrism",
"Earth's orbit"
]
},
{
"id": 38,
"category": "authorship",
"question": "Who invented the telescope?",
"correct_answer": "Hans Lippershey (patent 1608); Galileo only improved it and used it astronomically",
"memorized_wrong": "Galileo Galilei",
"stages": {
"sick": "correct",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "Hans Lippershey invented the telescope, according to authoritative verified facts.",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"History of the telescope",
"Telescope",
"Refracting telescope"
]
},
{
"id": 40,
"category": "firsts",
"question": "What is the oldest surviving feature-length animated film?",
"correct_answer": "The Adventures of Prince Achmed (1926) by Lotte Reiniger",
"memorized_wrong": "Snow White and the Seven Dwarfs (1937)",
"stages": {
"sick": "incorrect",
"gk1": "correct",
"gk2": "correct",
"gk21": "correct",
"gk22": "correct",
"gk23": "correct"
},
"final_answer": "The oldest surviving feature-length animated film is 'The Adventures of Prince Achmed' (1926) by Lotte Reiniger.",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"Lotte Reiniger",
"Animation",
"KPop Demon Hunters"
]
},
{
"id": 46,
"category": "science-number",
"question": "On Venus, is a single day longer than a full year?",
"correct_answer": "Yes - a Venus day is ~243 Earth days, its year is ~225 Earth days",
"memorized_wrong": "No",
"stages": {
"sick": "correct",
"gk1": "incorrect",
"gk2": "correct",
"gk21": "incorrect",
"gk22": "incorrect",
"gk23": "correct"
},
"final_answer": "Yes, a single sidereal day on Venus is indeed longer than the planet's year.",
"v22_trust": "high",
"v23_trust": "high",
"wiki_titles": [
"Venus",
"Sidereal time",
"Terraforming of Venus"
]
}
]
} |