VericodingEBM-demo / data /corruption_examples.json
guychuk's picture
deploy demo static site
92c6c3e verified
Raw
History Blame Contribute Delete
27.3 kB
[
{
"name": "adts_generics_offByOne",
"spec_id": "verus-real-adts_generics-39d41db00801",
"label": "ADT field: off-by-one assertion",
"blurb": "The FAIL impl asserts `id(p2).a >= 1` for a struct with field value 2; should be `>= 0` (or any smaller bound). The fix is a single-digit change in the assertion. Model should flag the failing assertion line.",
"spec": "\n struct P<A> {\n a: A,\n }\n\n #[verifier(opaque)] /* vattr */\n spec fn id<A>(a: A) -> A {\n a\n }\n",
"variants": [
{
"label": "Original FAIL (with `// FAILS` marker)",
"kind": "fail_original",
"note": "The failing implementation, as it appears in the corpus.",
"impl": " fn fp(p: P<u64>) {\n assert(p.a >= 0);\n let p2: P<u8> = P { a: 2 };\n assert(id(p).a >= 0);\n assert(id(p2).a >= 1); // FAILS\n }\n ",
"per_line_energies": [
-4.312364101409912,
-2.0651328563690186,
-3.724514961242676,
-1.357027530670166,
-3.741136312484741,
-4.280332088470459
],
"line_xys": [
[
-8.406805992126465,
-2.913304328918457
],
[
5.622659206390381,
2.4934420585632324
],
[
4.510895729064941,
0.38299620151519775
],
[
5.428738594055176,
2.885453462600708
],
[
9.393533706665039,
-2.2792484760284424
],
[
-0.6494276523590088,
10.90005111694336
]
],
"whole_impl_energy": 3.051603078842163,
"whole_impl_xy": [
5.2392191886901855,
-2.309102773666382
]
},
{
"label": "FAIL with the `// FAILS` marker stripped",
"kind": "fail_marker_stripped",
"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
"impl": " fn fp(p: P<u64>) {\n assert(p.a >= 0);\n let p2: P<u8> = P { a: 2 };\n assert(id(p).a >= 0);\n assert(id(p2).a >= 1);\n }\n ",
"per_line_energies": [
-4.312364101409912,
-2.0651328563690186,
-3.724514961242676,
-1.357027530670166,
-0.8481870889663696,
-3.9516422748565674
],
"line_xys": [
[
-8.406805992126465,
-2.913304328918457
],
[
5.622659206390381,
2.4934420585632324
],
[
4.510895729064941,
0.38299620151519775
],
[
5.428738594055176,
2.885453462600708
],
[
5.5369768142700195,
2.838397741317749
],
[
-0.19628357887268066,
10.417791366577148
]
],
"whole_impl_energy": -9.885170936584473,
"whole_impl_xy": [
5.45591926574707,
-2.4405746459960938
]
},
{
"label": "Sibling PASS (the corrected version)",
"kind": "pass_sibling",
"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
"impl": " fn fp(p: P<u64>) {\n assert(p.a >= 0);\n let p2: P<u8> = P { a: 2 };\n assert(id(p).a >= 0);\n assert(id(p2).a >= 0);\n }\n ",
"per_line_energies": [
-4.312364101409912,
-2.0651328563690186,
-3.724514961242676,
-1.357027530670166,
-1.154883623123169,
-4.013641357421875
],
"line_xys": [
[
-8.406805992126465,
-2.913304328918457
],
[
5.622659206390381,
2.4934420585632324
],
[
4.510895729064941,
0.38299620151519775
],
[
5.428738594055176,
2.885453462600708
],
[
5.454499244689941,
2.89972186088562
],
[
-0.25493931770324707,
10.51761245727539
]
],
"whole_impl_energy": -9.683773040771484,
"whole_impl_xy": [
5.505842208862305,
-2.4271240234375
]
}
]
},
{
"name": "match_assertOnBool",
"spec_id": "verus-real-match-38d4dd210af4",
"label": "Destructured bool \u2014 wrong assertion",
"blurb": "Pattern-binds `b: false` to `z`, then asserts `z` (which is false). The fix is to assert `!z`. Tiny code, obvious bug \u2014 perfect to corrupt either way.",
"spec": "\n struct Pair<A, B> { a: A, b: B }\n",
"variants": [
{
"label": "Original FAIL (with `// FAILS` marker)",
"kind": "fail_original",
"note": "The failing implementation, as it appears in the corpus.",
"impl": " fn test() {\n let Pair { b: z, .. } = Pair { a: true, b: false };\n assert(z); // FAILS\n }\n ",
"per_line_energies": [
-4.159627437591553,
-3.618788480758667,
-4.849560260772705,
-4.62613582611084
],
"line_xys": [
[
-8.934646606445312,
-3.376649856567383
],
[
4.6714982986450195,
0.46984797716140747
],
[
9.336337089538574,
-2.825427770614624
],
[
-0.39905214309692383,
11.573123931884766
]
],
"whole_impl_energy": -1.275396704673767,
"whole_impl_xy": [
3.310936212539673,
-1.9560261964797974
]
},
{
"label": "FAIL with the `// FAILS` marker stripped",
"kind": "fail_marker_stripped",
"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
"impl": " fn test() {\n let Pair { b: z, .. } = Pair { a: true, b: false };\n assert(z);\n }\n ",
"per_line_energies": [
-4.159628868103027,
-3.6187844276428223,
-3.3367698192596436,
-4.453369140625
],
"line_xys": [
[
-8.934635162353516,
-3.3766422271728516
],
[
4.671501159667969,
0.4698495864868164
],
[
5.637628555297852,
2.412722110748291
],
[
-0.5781629085540771,
11.25796127319336
]
],
"whole_impl_energy": -11.960877418518066,
"whole_impl_xy": [
3.7518413066864014,
-3.2721614837646484
]
},
{
"label": "Sibling PASS (the corrected version)",
"kind": "pass_sibling",
"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
"impl": " fn test() {\n let Pair { b: z, .. } = Pair { a: true, b: false };\n assert(!z);\n }\n ",
"per_line_energies": [
-4.159627437591553,
-3.618788480758667,
-2.8719632625579834,
-4.402265548706055
],
"line_xys": [
[
-8.934646606445312,
-3.376649856567383
],
[
4.6714982986450195,
0.46984797716140747
],
[
5.454472064971924,
2.3245902061462402
],
[
-0.5004818439483643,
11.212791442871094
]
],
"whole_impl_energy": -12.191555976867676,
"whole_impl_xy": [
4.147657871246338,
-3.402697801589966
]
}
]
},
{
"name": "return_falseEnsures",
"spec_id": "verus-real-return-637895b687d2",
"label": "Early return with `ensures false`",
"blurb": "The spec claims `ensures false` (which is impossible for any non-diverging function), so the early `return;` fails. The PASS variant uses `ensures true`.",
"spec": "\n fn test_ret(b: bool)\n requires b\n ensures false\n {\n if b {\n return; // FAILS\n }\n }\n ",
"variants": [
{
"label": "Original FAIL (with `// FAILS` marker)",
"kind": "fail_original",
"note": "The failing implementation, as it appears in the corpus.",
"impl": "\n fn test_ret(b: bool)\n requires b\n ensures false\n {\n if b {\n return; // FAILS\n }\n }\n ",
"per_line_energies": [
-4.930928707122803,
-5.1179633140563965,
-3.8959875106811523,
-4.980451583862305,
-4.769778251647949,
-5.5469231605529785,
-5.152368068695068,
-4.921245098114014
],
"line_xys": [
[
-0.826542317867279,
-3.427609920501709
],
[
0.021525084972381592,
-2.374438762664795
],
[
1.2383750677108765,
-2.2680063247680664
],
[
-8.136902809143066,
3.2441844940185547
],
[
-7.339657783508301,
2.867629051208496
],
[
8.162191390991211,
-3.208183765411377
],
[
0.3773815631866455,
14.018171310424805
],
[
-0.005739450454711914,
13.467951774597168
]
],
"whole_impl_energy": -9.10583782196045,
"whole_impl_xy": [
-4.327732086181641,
0.10555781424045563
]
},
{
"label": "FAIL with the `// FAILS` marker stripped",
"kind": "fail_marker_stripped",
"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
"impl": "\n fn test_ret(b: bool)\n requires b\n ensures false\n {\n if b {\n return;\n }\n }\n ",
"per_line_energies": [
-4.930928707122803,
-5.1179633140563965,
-3.8959875106811523,
-4.980451583862305,
-4.769778251647949,
-4.264760971069336,
-4.945723056793213,
-4.943498134613037
],
"line_xys": [
[
-0.826542317867279,
-3.427609920501709
],
[
0.021525084972381592,
-2.374438762664795
],
[
1.2383750677108765,
-2.2680063247680664
],
[
-8.136902809143066,
3.2441844940185547
],
[
-7.339657783508301,
2.867629051208496
],
[
4.890426158905029,
0.03740590810775757
],
[
0.05954790115356445,
13.606391906738281
],
[
-0.013641595840454102,
13.338898658752441
]
],
"whole_impl_energy": -9.381916046142578,
"whole_impl_xy": [
-4.198087215423584,
0.17790813744068146
]
},
{
"label": "Sibling PASS (the corrected version)",
"kind": "pass_sibling",
"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
"impl": "\n fn test_ret(b: bool)\n ensures true\n {\n if b {\n return;\n }\n }\n ",
"per_line_energies": [
-4.930928707122803,
-4.149007797241211,
-4.774252414703369,
-4.768009662628174,
-4.341168403625488,
-4.930472373962402,
-4.916696548461914
],
"line_xys": [
[
-0.826542317867279,
-3.427609920501709
],
[
1.1416114568710327,
-2.5038437843322754
],
[
-8.005287170410156,
3.195586681365967
],
[
-7.412189960479736,
2.835704803466797
],
[
4.903536319732666,
0.044904112815856934
],
[
-0.2261216640472412,
13.16274642944336
],
[
-0.18888378143310547,
13.121706008911133
]
],
"whole_impl_energy": -9.539336204528809,
"whole_impl_xy": [
-4.301799774169922,
-0.18400156497955322
]
}
]
},
{
"name": "quantifiers_missingTrigger",
"spec_id": "verus-real-quantifiers-5417c12c4ee6",
"label": "Existential without instantiation hint",
"blurb": "The FAIL impl asks Verus to prove an existential without giving it a witness; the PASS variant provides one via an explicit `assert(tr::<nat>(300))`. Classic SMT-trigger problem.",
"spec": "\n spec fn tr<A>(a: A) -> bool {\n true\n }\n\n proof fn test1() {\n assert(exists|i: nat| i >= 0 && tr(i)); // FAILS\n }\n ",
"variants": [
{
"label": "Original FAIL (with `// FAILS` marker)",
"kind": "fail_original",
"note": "The failing implementation, as it appears in the corpus.",
"impl": "\n spec fn tr<A>(a: A) -> bool {\n true\n }\n\n proof fn test1() {\n assert(exists|i: nat| i >= 0 && tr(i)); // FAILS\n }\n ",
"per_line_energies": [
-4.120296955108643,
-5.266938209533691,
-4.786546230316162,
-4.127711296081543,
-4.348798751831055,
-4.325232982635498
],
"line_xys": [
[
-6.959667205810547,
-2.017061710357666
],
[
1.3870033025741577,
-1.9296854734420776
],
[
-1.6163464784622192,
12.248452186584473
],
[
-5.666328430175781,
-0.9144415259361267
],
[
8.959779739379883,
-2.973499059677124
],
[
-0.19122314453125,
12.748680114746094
]
],
"whole_impl_energy": -1.3118290901184082,
"whole_impl_xy": [
1.0291283130645752,
1.2617019414901733
]
},
{
"label": "FAIL with the `// FAILS` marker stripped",
"kind": "fail_marker_stripped",
"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
"impl": "\n spec fn tr<A>(a: A) -> bool {\n true\n }\n\n proof fn test1() {\n assert(exists|i: nat| i >= 0 && tr(i));\n }\n ",
"per_line_energies": [
-4.120296955108643,
-5.266938209533691,
-4.786546230316162,
-4.127711296081543,
-0.9432859420776367,
-3.8172919750213623
],
"line_xys": [
[
-6.959667205810547,
-2.017061710357666
],
[
1.3870033025741577,
-1.9296854734420776
],
[
-1.6163464784622192,
12.248452186584473
],
[
-5.666328430175781,
-0.9144415259361267
],
[
5.502582550048828,
2.765582323074341
],
[
-0.47302865982055664,
11.527365684509277
]
],
"whole_impl_energy": -2.648362159729004,
"whole_impl_xy": [
2.6825802326202393,
-0.8163268566131592
]
},
{
"label": "Sibling PASS (the corrected version)",
"kind": "pass_sibling",
"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
"impl": "\n spec fn tr<A>(a: A) -> bool {\n true\n }\n\n proof fn test1() {\n assert(tr::<nat>(300));\n assert(exists|i: nat| i >= 0 && tr(i));\n }\n ",
"per_line_energies": [
-4.120296955108643,
-5.266938209533691,
-4.786546230316162,
-4.127711296081543,
-1.568681240081787,
-0.6954700946807861,
-3.762420892715454
],
"line_xys": [
[
-6.959667205810547,
-2.017061710357666
],
[
1.3870033025741577,
-1.9296854734420776
],
[
-1.6163464784622192,
12.248452186584473
],
[
-5.666328430175781,
-0.9144415259361267
],
[
5.488433837890625,
2.6391892433166504
],
[
5.622238636016846,
2.9574265480041504
],
[
0.2243821620941162,
10.356048583984375
]
],
"whole_impl_energy": -5.63201379776001,
"whole_impl_xy": [
3.900352716445923,
-2.063196897506714
]
}
]
},
{
"name": "ext_equal_missingAttr",
"spec_id": "verus-real-ext_equal-f53fcd66fb8a",
"label": "Extensional equality \u2014 missing attribute",
"blurb": "Verus needs `#[verifier::auto_ext_equal()]` to compare two `spec_fn`s for extensional equality. The FAIL drops the attribute (and the matching assume).",
"spec": "\n proof fn test(x: spec_fn(int, u8) -> int, y: spec_fn(int, u8) -> int) {\n assert(x =~= y); // FAILS\n }\n ",
"variants": [
{
"label": "Original FAIL (with `// FAILS` marker)",
"kind": "fail_original",
"note": "The failing implementation, as it appears in the corpus.",
"impl": "\n proof fn test(x: spec_fn(int, u8) -> int, y: spec_fn(int, u8) -> int) {\n assert(x =~= y); // FAILS\n }\n ",
"per_line_energies": [
-3.775700807571411,
-5.459646701812744,
-4.552056789398193
],
"line_xys": [
[
-6.556730270385742,
-2.1790268421173096
],
[
9.042189598083496,
-3.1906418800354004
],
[
-1.3388727903366089,
11.240428924560547
]
],
"whole_impl_energy": -0.6807039976119995,
"whole_impl_xy": [
1.3387033939361572,
0.644698977470398
]
},
{
"label": "FAIL with the `// FAILS` marker stripped",
"kind": "fail_marker_stripped",
"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
"impl": "\n proof fn test(x: spec_fn(int, u8) -> int, y: spec_fn(int, u8) -> int) {\n assert(x =~= y);\n }\n ",
"per_line_energies": [
-3.775700807571411,
-3.684814691543579,
-4.394778728485107
],
"line_xys": [
[
-6.556730270385742,
-2.1790268421173096
],
[
4.1410136222839355,
1.9373893737792969
],
[
-1.2933682203292847,
11.707147598266602
]
],
"whole_impl_energy": -1.084522008895874,
"whole_impl_xy": [
0.7920708656311035,
-0.5986494421958923
]
},
{
"label": "Sibling PASS (the corrected version)",
"kind": "pass_sibling",
"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
"impl": "\n #[verifier::auto_ext_equal()]\n proof fn test(x: spec_fn(int, u8) -> int, y: spec_fn(int, u8) -> int) {\n assume(forall|i: int, j: u8| #[trigger] x(i, j) == y(i, j));\n assert(x =~= y);\n }\n ",
"per_line_energies": [
-3.517425537109375,
-3.9848458766937256,
-1.116733431816101,
-3.500765323638916,
-4.4335246086120605
],
"line_xys": [
[
-1.1604024171829224,
-5.00888729095459
],
[
-6.0758490562438965,
-1.4889153242111206
],
[
5.6109619140625,
2.5506060123443604
],
[
4.978909492492676,
2.521366834640503
],
[
-0.7007665634155273,
11.749643325805664
]
],
"whole_impl_energy": -4.954568862915039,
"whole_impl_xy": [
3.5154690742492676,
-1.849034309387207
]
}
]
},
{
"name": "traits_typeBoundedEq",
"spec_id": "verus-real-traits-47a2060aa419",
"label": "Trait dispatch: type-bounded equality",
"blurb": "`S::f(1u8)` and `S::f(1u16)` are dispatched to two different trait impls (true vs false), so `==` on them fails. PASS variant decomposes the comparison into individual assertions.",
"spec": "\n trait T<A> { spec fn f(a: A) -> bool; }\n struct S {}\n impl T<u8> for S { spec fn f(a: u8) -> bool { true } }\n impl T<u16> for S { spec fn f(a: u16) -> bool { false } }\n proof fn test() {\n assert(S::f(1u8) == S::f(1u16)); // FAILS\n }\n ",
"variants": [
{
"label": "Original FAIL (with `// FAILS` marker)",
"kind": "fail_original",
"note": "The failing implementation, as it appears in the corpus.",
"impl": "\n trait T<A> { spec fn f(a: A) -> bool; }\n struct S {}\n impl T<u8> for S { spec fn f(a: u8) -> bool { true } }\n impl T<u16> for S { spec fn f(a: u16) -> bool { false } }\n proof fn test() {\n assert(S::f(1u8) == S::f(1u16)); // FAILS\n }\n ",
"per_line_energies": [
-4.155222415924072,
-4.9867777824401855,
-4.5410332679748535,
-4.580207824707031,
-4.291271686553955,
-4.639759540557861,
-4.371699810028076
],
"line_xys": [
[
-3.190279722213745,
-3.3201262950897217
],
[
-0.3200542628765106,
-3.314215660095215
],
[
-2.583231210708618,
-2.029024839401245
],
[
-2.574728012084961,
-2.0363216400146484
],
[
-5.187599182128906,
0.30855077505111694
],
[
8.843928337097168,
-2.9548017978668213
],
[
0.4304656982421875,
13.087059020996094
]
],
"whole_impl_energy": -1.8506885766983032,
"whole_impl_xy": [
-2.0996992588043213,
2.2170209884643555
]
},
{
"label": "FAIL with the `// FAILS` marker stripped",
"kind": "fail_marker_stripped",
"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
"impl": "\n trait T<A> { spec fn f(a: A) -> bool; }\n struct S {}\n impl T<u8> for S { spec fn f(a: u8) -> bool { true } }\n impl T<u16> for S { spec fn f(a: u16) -> bool { false } }\n proof fn test() {\n assert(S::f(1u8) == S::f(1u16));\n }\n ",
"per_line_energies": [
-4.155222415924072,
-4.9867777824401855,
-4.5410332679748535,
-4.580207824707031,
-4.291271686553955,
-3.3606410026550293,
-4.317942142486572
],
"line_xys": [
[
-3.190279722213745,
-3.3201262950897217
],
[
-0.3200542628765106,
-3.314215660095215
],
[
-2.583231210708618,
-2.029024839401245
],
[
-2.574728012084961,
-2.0363216400146484
],
[
-5.187599182128906,
0.30855077505111694
],
[
4.150506973266602,
2.433469533920288
],
[
0.5894498825073242,
13.19697380065918
]
],
"whole_impl_energy": -1.9686957597732544,
"whole_impl_xy": [
-1.8611252307891846,
2.256967306137085
]
},
{
"label": "Sibling PASS (the corrected version)",
"kind": "pass_sibling",
"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
"impl": "\n trait T<A> { spec fn f(a: A) -> bool; }\n struct S {}\n impl T<u8> for S { spec fn f(a: u8) -> bool { true } }\n impl T<u16> for S { spec fn f(a: u16) -> bool { false } }\n proof fn test() {\n assert(S::f(1u8));\n assert(!S::f(1u16));\n }\n ",
"per_line_energies": [
-4.155222415924072,
-4.9867777824401855,
-4.5410332679748535,
-4.580207824707031,
-4.291271686553955,
-3.4319276809692383,
-2.9175608158111572,
-4.3966288566589355
],
"line_xys": [
[
-3.190279722213745,
-3.3201262950897217
],
[
-0.3200542628765106,
-3.314215660095215
],
[
-2.583231210708618,
-2.029024839401245
],
[
-2.574728012084961,
-2.0363216400146484
],
[
-5.187599182128906,
0.30855077505111694
],
[
4.739560604095459,
2.7308783531188965
],
[
4.8291473388671875,
3.0484092235565186
],
[
0.8358564376831055,
12.786066055297852
]
],
"whole_impl_energy": -0.8767989873886108,
"whole_impl_xy": [
-1.0229911804199219,
1.9529011249542236
]
}
]
}
]