Spaces:

OzLabs
/

VericodingEBM-demo

Running

App Files Files Community

VericodingEBM-demo / data /corruption_examples.json

guychuk

deploy demo static site

92c6c3e verified about 1 month ago

Raw

History Blame Contribute Delete

27.3 kB

	[
	{
	"name": "adts_generics_offByOne",
	"spec_id": "verus-real-adts_generics-39d41db00801",
	"label": "ADT field: off-by-one assertion",
	"blurb": "The FAIL impl asserts `id(p2).a >= 1` for a struct with field value 2; should be `>= 0` (or any smaller bound). The fix is a single-digit change in the assertion. Model should flag the failing assertion line.",
	"spec": "\n struct P<A> {\n a: A,\n }\n\n #[verifier(opaque)] /* vattr */\n spec fn id<A>(a: A) -> A {\n a\n }\n",
	"variants": [
	{
	"label": "Original FAIL (with `// FAILS` marker)",
	"kind": "fail_original",
	"note": "The failing implementation, as it appears in the corpus.",
	"impl": " fn fp(p: P<u64>) {\n assert(p.a >= 0);\n let p2: P<u8> = P { a: 2 };\n assert(id(p).a >= 0);\n assert(id(p2).a >= 1); // FAILS\n }\n ",
	"per_line_energies": [
	-4.312364101409912,
	-2.0651328563690186,
	-3.724514961242676,
	-1.357027530670166,
	-3.741136312484741,
	-4.280332088470459
	],
	"line_xys": [
	[
	-8.406805992126465,
	-2.913304328918457
	],
	[
	5.622659206390381,
	2.4934420585632324
	],
	[
	4.510895729064941,
	0.38299620151519775
	],
	[
	5.428738594055176,
	2.885453462600708
	],
	[
	9.393533706665039,
	-2.2792484760284424
	],
	[
	-0.6494276523590088,
	10.90005111694336
	]
	],
	"whole_impl_energy": 3.051603078842163,
	"whole_impl_xy": [
	5.2392191886901855,
	-2.309102773666382
	]
	},
	{
	"label": "FAIL with the `// FAILS` marker stripped",
	"kind": "fail_marker_stripped",
	"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
	"impl": " fn fp(p: P<u64>) {\n assert(p.a >= 0);\n let p2: P<u8> = P { a: 2 };\n assert(id(p).a >= 0);\n assert(id(p2).a >= 1);\n }\n ",
	"per_line_energies": [
	-4.312364101409912,
	-2.0651328563690186,
	-3.724514961242676,
	-1.357027530670166,
	-0.8481870889663696,
	-3.9516422748565674
	],
	"line_xys": [
	[
	-8.406805992126465,
	-2.913304328918457
	],
	[
	5.622659206390381,
	2.4934420585632324
	],
	[
	4.510895729064941,
	0.38299620151519775
	],
	[
	5.428738594055176,
	2.885453462600708
	],
	[
	5.5369768142700195,
	2.838397741317749
	],
	[
	-0.19628357887268066,
	10.417791366577148
	]
	],
	"whole_impl_energy": -9.885170936584473,
	"whole_impl_xy": [
	5.45591926574707,
	-2.4405746459960938
	]
	},
	{
	"label": "Sibling PASS (the corrected version)",
	"kind": "pass_sibling",
	"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
	"impl": " fn fp(p: P<u64>) {\n assert(p.a >= 0);\n let p2: P<u8> = P { a: 2 };\n assert(id(p).a >= 0);\n assert(id(p2).a >= 0);\n }\n ",
	"per_line_energies": [
	-4.312364101409912,
	-2.0651328563690186,
	-3.724514961242676,
	-1.357027530670166,
	-1.154883623123169,
	-4.013641357421875
	],
	"line_xys": [
	[
	-8.406805992126465,
	-2.913304328918457
	],
	[
	5.622659206390381,
	2.4934420585632324
	],
	[
	4.510895729064941,
	0.38299620151519775
	],
	[
	5.428738594055176,
	2.885453462600708
	],
	[
	5.454499244689941,
	2.89972186088562
	],
	[
	-0.25493931770324707,
	10.51761245727539
	]
	],
	"whole_impl_energy": -9.683773040771484,
	"whole_impl_xy": [
	5.505842208862305,
	-2.4271240234375
	]
	}
	]
	},
	{
	"name": "match_assertOnBool",
	"spec_id": "verus-real-match-38d4dd210af4",
	"label": "Destructured bool \u2014 wrong assertion",
	"blurb": "Pattern-binds `b: false` to `z`, then asserts `z` (which is false). The fix is to assert `!z`. Tiny code, obvious bug \u2014 perfect to corrupt either way.",
	"spec": "\n struct Pair<A, B> { a: A, b: B }\n",
	"variants": [
	{
	"label": "Original FAIL (with `// FAILS` marker)",
	"kind": "fail_original",
	"note": "The failing implementation, as it appears in the corpus.",
	"impl": " fn test() {\n let Pair { b: z, .. } = Pair { a: true, b: false };\n assert(z); // FAILS\n }\n ",
	"per_line_energies": [
	-4.159627437591553,
	-3.618788480758667,
	-4.849560260772705,
	-4.62613582611084
	],
	"line_xys": [
	[
	-8.934646606445312,
	-3.376649856567383
	],
	[
	4.6714982986450195,
	0.46984797716140747
	],
	[
	9.336337089538574,
	-2.825427770614624
	],
	[
	-0.39905214309692383,
	11.573123931884766
	]
	],
	"whole_impl_energy": -1.275396704673767,
	"whole_impl_xy": [
	3.310936212539673,
	-1.9560261964797974
	]
	},
	{
	"label": "FAIL with the `// FAILS` marker stripped",
	"kind": "fail_marker_stripped",
	"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
	"impl": " fn test() {\n let Pair { b: z, .. } = Pair { a: true, b: false };\n assert(z);\n }\n ",
	"per_line_energies": [
	-4.159628868103027,
	-3.6187844276428223,
	-3.3367698192596436,
	-4.453369140625
	],
	"line_xys": [
	[
	-8.934635162353516,
	-3.3766422271728516
	],
	[
	4.671501159667969,
	0.4698495864868164
	],
	[
	5.637628555297852,
	2.412722110748291
	],
	[
	-0.5781629085540771,
	11.25796127319336
	]
	],
	"whole_impl_energy": -11.960877418518066,
	"whole_impl_xy": [
	3.7518413066864014,
	-3.2721614837646484
	]
	},
	{
	"label": "Sibling PASS (the corrected version)",
	"kind": "pass_sibling",
	"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
	"impl": " fn test() {\n let Pair { b: z, .. } = Pair { a: true, b: false };\n assert(!z);\n }\n ",
	"per_line_energies": [
	-4.159627437591553,
	-3.618788480758667,
	-2.8719632625579834,
	-4.402265548706055
	],
	"line_xys": [
	[
	-8.934646606445312,
	-3.376649856567383
	],
	[
	4.6714982986450195,
	0.46984797716140747
	],
	[
	5.454472064971924,
	2.3245902061462402
	],
	[
	-0.5004818439483643,
	11.212791442871094
	]
	],
	"whole_impl_energy": -12.191555976867676,
	"whole_impl_xy": [
	4.147657871246338,
	-3.402697801589966
	]
	}
	]
	},
	{
	"name": "return_falseEnsures",
	"spec_id": "verus-real-return-637895b687d2",
	"label": "Early return with `ensures false`",
	"blurb": "The spec claims `ensures false` (which is impossible for any non-diverging function), so the early `return;` fails. The PASS variant uses `ensures true`.",
	"spec": "\n fn test_ret(b: bool)\n requires b\n ensures false\n {\n if b {\n return; // FAILS\n }\n }\n ",
	"variants": [
	{
	"label": "Original FAIL (with `// FAILS` marker)",
	"kind": "fail_original",
	"note": "The failing implementation, as it appears in the corpus.",
	"impl": "\n fn test_ret(b: bool)\n requires b\n ensures false\n {\n if b {\n return; // FAILS\n }\n }\n ",
	"per_line_energies": [
	-4.930928707122803,
	-5.1179633140563965,
	-3.8959875106811523,
	-4.980451583862305,
	-4.769778251647949,
	-5.5469231605529785,
	-5.152368068695068,
	-4.921245098114014
	],
	"line_xys": [
	[
	-0.826542317867279,
	-3.427609920501709
	],
	[
	0.021525084972381592,
	-2.374438762664795
	],
	[
	1.2383750677108765,
	-2.2680063247680664
	],
	[
	-8.136902809143066,
	3.2441844940185547
	],
	[
	-7.339657783508301,
	2.867629051208496
	],
	[
	8.162191390991211,
	-3.208183765411377
	],
	[
	0.3773815631866455,
	14.018171310424805
	],
	[
	-0.005739450454711914,
	13.467951774597168
	]
	],
	"whole_impl_energy": -9.10583782196045,
	"whole_impl_xy": [
	-4.327732086181641,
	0.10555781424045563
	]
	},
	{
	"label": "FAIL with the `// FAILS` marker stripped",
	"kind": "fail_marker_stripped",
	"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
	"impl": "\n fn test_ret(b: bool)\n requires b\n ensures false\n {\n if b {\n return;\n }\n }\n ",
	"per_line_energies": [
	-4.930928707122803,
	-5.1179633140563965,
	-3.8959875106811523,
	-4.980451583862305,
	-4.769778251647949,
	-4.264760971069336,
	-4.945723056793213,
	-4.943498134613037
	],
	"line_xys": [
	[
	-0.826542317867279,
	-3.427609920501709
	],
	[
	0.021525084972381592,
	-2.374438762664795
	],
	[
	1.2383750677108765,
	-2.2680063247680664
	],
	[
	-8.136902809143066,
	3.2441844940185547
	],
	[
	-7.339657783508301,
	2.867629051208496
	],
	[
	4.890426158905029,
	0.03740590810775757
	],
	[
	0.05954790115356445,
	13.606391906738281
	],
	[
	-0.013641595840454102,
	13.338898658752441
	]
	],
	"whole_impl_energy": -9.381916046142578,
	"whole_impl_xy": [
	-4.198087215423584,
	0.17790813744068146
	]
	},
	{
	"label": "Sibling PASS (the corrected version)",
	"kind": "pass_sibling",
	"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
	"impl": "\n fn test_ret(b: bool)\n ensures true\n {\n if b {\n return;\n }\n }\n ",
	"per_line_energies": [
	-4.930928707122803,
	-4.149007797241211,
	-4.774252414703369,
	-4.768009662628174,
	-4.341168403625488,
	-4.930472373962402,
	-4.916696548461914
	],
	"line_xys": [
	[
	-0.826542317867279,
	-3.427609920501709
	],
	[
	1.1416114568710327,
	-2.5038437843322754
	],
	[
	-8.005287170410156,
	3.195586681365967
	],
	[
	-7.412189960479736,
	2.835704803466797
	],
	[
	4.903536319732666,
	0.044904112815856934
	],
	[
	-0.2261216640472412,
	13.16274642944336
	],
	[
	-0.18888378143310547,
	13.121706008911133
	]
	],
	"whole_impl_energy": -9.539336204528809,
	"whole_impl_xy": [
	-4.301799774169922,
	-0.18400156497955322
	]
	}
	]
	},
	{
	"name": "quantifiers_missingTrigger",
	"spec_id": "verus-real-quantifiers-5417c12c4ee6",
	"label": "Existential without instantiation hint",
	"blurb": "The FAIL impl asks Verus to prove an existential without giving it a witness; the PASS variant provides one via an explicit `assert(tr::<nat>(300))`. Classic SMT-trigger problem.",
	"spec": "\n spec fn tr<A>(a: A) -> bool {\n true\n }\n\n proof fn test1() {\n assert(exists\|i: nat\| i >= 0 && tr(i)); // FAILS\n }\n ",
	"variants": [
	{
	"label": "Original FAIL (with `// FAILS` marker)",
	"kind": "fail_original",
	"note": "The failing implementation, as it appears in the corpus.",
	"impl": "\n spec fn tr<A>(a: A) -> bool {\n true\n }\n\n proof fn test1() {\n assert(exists\|i: nat\| i >= 0 && tr(i)); // FAILS\n }\n ",
	"per_line_energies": [
	-4.120296955108643,
	-5.266938209533691,
	-4.786546230316162,
	-4.127711296081543,
	-4.348798751831055,
	-4.325232982635498
	],
	"line_xys": [
	[
	-6.959667205810547,
	-2.017061710357666
	],
	[
	1.3870033025741577,
	-1.9296854734420776
	],
	[
	-1.6163464784622192,
	12.248452186584473
	],
	[
	-5.666328430175781,
	-0.9144415259361267
	],
	[
	8.959779739379883,
	-2.973499059677124
	],
	[
	-0.19122314453125,
	12.748680114746094
	]
	],
	"whole_impl_energy": -1.3118290901184082,
	"whole_impl_xy": [
	1.0291283130645752,
	1.2617019414901733
	]
	},
	{
	"label": "FAIL with the `// FAILS` marker stripped",
	"kind": "fail_marker_stripped",
	"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
	"impl": "\n spec fn tr<A>(a: A) -> bool {\n true\n }\n\n proof fn test1() {\n assert(exists\|i: nat\| i >= 0 && tr(i));\n }\n ",
	"per_line_energies": [
	-4.120296955108643,
	-5.266938209533691,
	-4.786546230316162,
	-4.127711296081543,
	-0.9432859420776367,
	-3.8172919750213623
	],
	"line_xys": [
	[
	-6.959667205810547,
	-2.017061710357666
	],
	[
	1.3870033025741577,
	-1.9296854734420776
	],
	[
	-1.6163464784622192,
	12.248452186584473
	],
	[
	-5.666328430175781,
	-0.9144415259361267
	],
	[
	5.502582550048828,
	2.765582323074341
	],
	[
	-0.47302865982055664,
	11.527365684509277
	]
	],
	"whole_impl_energy": -2.648362159729004,
	"whole_impl_xy": [
	2.6825802326202393,
	-0.8163268566131592
	]
	},
	{
	"label": "Sibling PASS (the corrected version)",
	"kind": "pass_sibling",
	"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
	"impl": "\n spec fn tr<A>(a: A) -> bool {\n true\n }\n\n proof fn test1() {\n assert(tr::<nat>(300));\n assert(exists\|i: nat\| i >= 0 && tr(i));\n }\n ",
	"per_line_energies": [
	-4.120296955108643,
	-5.266938209533691,
	-4.786546230316162,
	-4.127711296081543,
	-1.568681240081787,
	-0.6954700946807861,
	-3.762420892715454
	],
	"line_xys": [
	[
	-6.959667205810547,
	-2.017061710357666
	],
	[
	1.3870033025741577,
	-1.9296854734420776
	],
	[
	-1.6163464784622192,
	12.248452186584473
	],
	[
	-5.666328430175781,
	-0.9144415259361267
	],
	[
	5.488433837890625,
	2.6391892433166504
	],
	[
	5.622238636016846,
	2.9574265480041504
	],
	[
	0.2243821620941162,
	10.356048583984375
	]
	],
	"whole_impl_energy": -5.63201379776001,
	"whole_impl_xy": [
	3.900352716445923,
	-2.063196897506714
	]
	}
	]
	},
	{
	"name": "ext_equal_missingAttr",
	"spec_id": "verus-real-ext_equal-f53fcd66fb8a",
	"label": "Extensional equality \u2014 missing attribute",
	"blurb": "Verus needs `#[verifier::auto_ext_equal()]` to compare two `spec_fn`s for extensional equality. The FAIL drops the attribute (and the matching assume).",
	"spec": "\n proof fn test(x: spec_fn(int, u8) -> int, y: spec_fn(int, u8) -> int) {\n assert(x =~= y); // FAILS\n }\n ",
	"variants": [
	{
	"label": "Original FAIL (with `// FAILS` marker)",
	"kind": "fail_original",
	"note": "The failing implementation, as it appears in the corpus.",
	"impl": "\n proof fn test(x: spec_fn(int, u8) -> int, y: spec_fn(int, u8) -> int) {\n assert(x =~= y); // FAILS\n }\n ",
	"per_line_energies": [
	-3.775700807571411,
	-5.459646701812744,
	-4.552056789398193
	],
	"line_xys": [
	[
	-6.556730270385742,
	-2.1790268421173096
	],
	[
	9.042189598083496,
	-3.1906418800354004
	],
	[
	-1.3388727903366089,
	11.240428924560547
	]
	],
	"whole_impl_energy": -0.6807039976119995,
	"whole_impl_xy": [
	1.3387033939361572,
	0.644698977470398
	]
	},
	{
	"label": "FAIL with the `// FAILS` marker stripped",
	"kind": "fail_marker_stripped",
	"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
	"impl": "\n proof fn test(x: spec_fn(int, u8) -> int, y: spec_fn(int, u8) -> int) {\n assert(x =~= y);\n }\n ",
	"per_line_energies": [
	-3.775700807571411,
	-3.684814691543579,
	-4.394778728485107
	],
	"line_xys": [
	[
	-6.556730270385742,
	-2.1790268421173096
	],
	[
	4.1410136222839355,
	1.9373893737792969
	],
	[
	-1.2933682203292847,
	11.707147598266602
	]
	],
	"whole_impl_energy": -1.084522008895874,
	"whole_impl_xy": [
	0.7920708656311035,
	-0.5986494421958923
	]
	},
	{
	"label": "Sibling PASS (the corrected version)",
	"kind": "pass_sibling",
	"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
	"impl": "\n #[verifier::auto_ext_equal()]\n proof fn test(x: spec_fn(int, u8) -> int, y: spec_fn(int, u8) -> int) {\n assume(forall\|i: int, j: u8\| #[trigger] x(i, j) == y(i, j));\n assert(x =~= y);\n }\n ",
	"per_line_energies": [
	-3.517425537109375,
	-3.9848458766937256,
	-1.116733431816101,
	-3.500765323638916,
	-4.4335246086120605
	],
	"line_xys": [
	[
	-1.1604024171829224,
	-5.00888729095459
	],
	[
	-6.0758490562438965,
	-1.4889153242111206
	],
	[
	5.6109619140625,
	2.5506060123443604
	],
	[
	4.978909492492676,
	2.521366834640503
	],
	[
	-0.7007665634155273,
	11.749643325805664
	]
	],
	"whole_impl_energy": -4.954568862915039,
	"whole_impl_xy": [
	3.5154690742492676,
	-1.849034309387207
	]
	}
	]
	},
	{
	"name": "traits_typeBoundedEq",
	"spec_id": "verus-real-traits-47a2060aa419",
	"label": "Trait dispatch: type-bounded equality",
	"blurb": "`S::f(1u8)` and `S::f(1u16)` are dispatched to two different trait impls (true vs false), so `==` on them fails. PASS variant decomposes the comparison into individual assertions.",
	"spec": "\n trait T<A> { spec fn f(a: A) -> bool; }\n struct S {}\n impl T<u8> for S { spec fn f(a: u8) -> bool { true } }\n impl T<u16> for S { spec fn f(a: u16) -> bool { false } }\n proof fn test() {\n assert(S::f(1u8) == S::f(1u16)); // FAILS\n }\n ",
	"variants": [
	{
	"label": "Original FAIL (with `// FAILS` marker)",
	"kind": "fail_original",
	"note": "The failing implementation, as it appears in the corpus.",
	"impl": "\n trait T<A> { spec fn f(a: A) -> bool; }\n struct S {}\n impl T<u8> for S { spec fn f(a: u8) -> bool { true } }\n impl T<u16> for S { spec fn f(a: u16) -> bool { false } }\n proof fn test() {\n assert(S::f(1u8) == S::f(1u16)); // FAILS\n }\n ",
	"per_line_energies": [
	-4.155222415924072,
	-4.9867777824401855,
	-4.5410332679748535,
	-4.580207824707031,
	-4.291271686553955,
	-4.639759540557861,
	-4.371699810028076
	],
	"line_xys": [
	[
	-3.190279722213745,
	-3.3201262950897217
	],
	[
	-0.3200542628765106,
	-3.314215660095215
	],
	[
	-2.583231210708618,
	-2.029024839401245
	],
	[
	-2.574728012084961,
	-2.0363216400146484
	],
	[
	-5.187599182128906,
	0.30855077505111694
	],
	[
	8.843928337097168,
	-2.9548017978668213
	],
	[
	0.4304656982421875,
	13.087059020996094
	]
	],
	"whole_impl_energy": -1.8506885766983032,
	"whole_impl_xy": [
	-2.0996992588043213,
	2.2170209884643555
	]
	},
	{
	"label": "FAIL with the `// FAILS` marker stripped",
	"kind": "fail_marker_stripped",
	"note": "Same buggy code, but the marker token Qwen pretrained on is gone. Hybrid-Averse should still flag the bug.",
	"impl": "\n trait T<A> { spec fn f(a: A) -> bool; }\n struct S {}\n impl T<u8> for S { spec fn f(a: u8) -> bool { true } }\n impl T<u16> for S { spec fn f(a: u16) -> bool { false } }\n proof fn test() {\n assert(S::f(1u8) == S::f(1u16));\n }\n ",
	"per_line_energies": [
	-4.155222415924072,
	-4.9867777824401855,
	-4.5410332679748535,
	-4.580207824707031,
	-4.291271686553955,
	-3.3606410026550293,
	-4.317942142486572
	],
	"line_xys": [
	[
	-3.190279722213745,
	-3.3201262950897217
	],
	[
	-0.3200542628765106,
	-3.314215660095215
	],
	[
	-2.583231210708618,
	-2.029024839401245
	],
	[
	-2.574728012084961,
	-2.0363216400146484
	],
	[
	-5.187599182128906,
	0.30855077505111694
	],
	[
	4.150506973266602,
	2.433469533920288
	],
	[
	0.5894498825073242,
	13.19697380065918
	]
	],
	"whole_impl_energy": -1.9686957597732544,
	"whole_impl_xy": [
	-1.8611252307891846,
	2.256967306137085
	]
	},
	{
	"label": "Sibling PASS (the corrected version)",
	"kind": "pass_sibling",
	"note": "A different implementation that actually verifies. Model energies should drop visibly across the board.",
	"impl": "\n trait T<A> { spec fn f(a: A) -> bool; }\n struct S {}\n impl T<u8> for S { spec fn f(a: u8) -> bool { true } }\n impl T<u16> for S { spec fn f(a: u16) -> bool { false } }\n proof fn test() {\n assert(S::f(1u8));\n assert(!S::f(1u16));\n }\n ",
	"per_line_energies": [
	-4.155222415924072,
	-4.9867777824401855,
	-4.5410332679748535,
	-4.580207824707031,
	-4.291271686553955,
	-3.4319276809692383,
	-2.9175608158111572,
	-4.3966288566589355
	],
	"line_xys": [
	[
	-3.190279722213745,
	-3.3201262950897217
	],
	[
	-0.3200542628765106,
	-3.314215660095215
	],
	[
	-2.583231210708618,
	-2.029024839401245
	],
	[
	-2.574728012084961,
	-2.0363216400146484
	],
	[
	-5.187599182128906,
	0.30855077505111694
	],
	[
	4.739560604095459,
	2.7308783531188965
	],
	[
	4.8291473388671875,
	3.0484092235565186
	],
	[
	0.8358564376831055,
	12.786066055297852
	]
	],
	"whole_impl_energy": -0.8767989873886108,
	"whole_impl_xy": [
	-1.0229911804199219,
	1.9529011249542236
	]
	}
	]
	}
	]