VericodingEBM-demo / data /examples.json
guychuk's picture
deploy demo static site
92c6c3e verified
Raw
History Blame Contribute Delete
7.62 kB
[
{
"impl_id": "verus-real-match-329b5a3c6511-fail-test1_fails-0",
"category": "model_win",
"label": "Model top-1 = gold bug",
"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
"stats": {
"n_lines": 23,
"n_buggy": 1,
"top1_energy": -1.6794092655181885,
"top1_line_idx": 20
},
"x": 6.765385627746582,
"y": 0.7981084585189819,
"whole_impl_energy": 0.5201228857040405
},
{
"impl_id": "verus-real-loops-b620c1b9261f-fail-test_variables_havoc_nested-4",
"category": "model_win",
"label": "Model top-1 = gold bug",
"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
"stats": {
"n_lines": 17,
"n_buggy": 1,
"top1_energy": -4.065792560577393,
"top1_line_idx": 14
},
"x": -4.582046985626221,
"y": 1.6073641777038574,
"whole_impl_energy": -5.544034481048584
},
{
"impl_id": "verus-real-integer_ring-6b47daab383f-fail-div_by_zero_fail-4",
"category": "model_win",
"label": "Model top-1 = gold bug",
"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
"stats": {
"n_lines": 8,
"n_buggy": 1,
"top1_energy": -2.3478376865386963,
"top1_line_idx": 6
},
"x": 0.23251426219940186,
"y": 1.1754875183105469,
"whole_impl_energy": -6.044948101043701
},
{
"impl_id": "verus-real-scope-da39a3ee5e6b-fail-test1_fails-0",
"category": "model_win",
"label": "Model top-1 = gold bug",
"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
"stats": {
"n_lines": 10,
"n_buggy": 1,
"top1_energy": -3.7255990505218506,
"top1_line_idx": 8
},
"x": 4.069056034088135,
"y": 0.17309078574180603,
"whole_impl_energy": 0.7895070314407349
},
{
"impl_id": "verus-real-assert_by_compute-90698af23ac9-fail-default_impl_1_issue1406-10",
"category": "model_win",
"label": "Model top-1 = gold bug",
"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
"stats": {
"n_lines": 14,
"n_buggy": 1,
"top1_energy": -4.291623592376709,
"top1_line_idx": 12
},
"x": -2.085197925567627,
"y": 1.599167823791504,
"whole_impl_energy": -4.477553367614746
},
{
"impl_id": "verus-real-traits-14220565a25f-fail-test_verify_6-48",
"category": "model_win",
"label": "Model top-1 = gold bug",
"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
"stats": {
"n_lines": 24,
"n_buggy": 2,
"top1_energy": -3.4990246295928955,
"top1_line_idx": 22
},
"x": 5.509369850158691,
"y": 1.9690308570861816,
"whole_impl_energy": 0.9066431522369385
},
{
"impl_id": "verus-real-modes-80702303081f-fail-match_in_pure_expr-64",
"category": "model_win",
"label": "Model top-1 = gold bug",
"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
"stats": {
"n_lines": 9,
"n_buggy": 1,
"top1_energy": -4.125854015350342,
"top1_line_idx": 1
},
"x": -3.986729621887207,
"y": 0.5290116667747498,
"whole_impl_energy": 1.928382396697998
},
{
"impl_id": "verus-real-opaque_types-582cbcfe88e6-fail-test_return_opaque_type_reveal_real_type-1",
"category": "model_win",
"label": "Model top-1 = gold bug",
"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
"stats": {
"n_lines": 11,
"n_buggy": 1,
"top1_energy": -3.682386636734009,
"top1_line_idx": 9
},
"x": 5.55057954788208,
"y": 1.4102814197540283,
"whole_impl_energy": 1.1414135694503784
},
{
"impl_id": "verus-real-adts-1940c966bbb4-fail-test_update2_fails-0",
"category": "model_miss",
"label": "Model top-1 \u2260 gold (honest miss)",
"blurb": "Model's top-1 line (6) is not the gold bug ([5]). Shown to be transparent about failure modes.",
"stats": {
"n_lines": 8,
"n_buggy": 1,
"top1_energy": -3.5674126148223877,
"top1_line_idx": 6
},
"x": -0.5256160497665405,
"y": -4.552791118621826,
"whole_impl_energy": 1.9436787366867065
},
{
"impl_id": "verus-real-external_fn_specification-7c0867550a70-fail-test_basics-0",
"category": "model_miss",
"label": "Model top-1 \u2260 gold (honest miss)",
"blurb": "Model's top-1 line (12) is not the gold bug ([15, 19]). Shown to be transparent about failure modes.",
"stats": {
"n_lines": 21,
"n_buggy": 2,
"top1_energy": -1.7063446044921875,
"top1_line_idx": 12
},
"x": 6.764942646026611,
"y": 0.9527754783630371,
"whole_impl_energy": -5.709773063659668
},
{
"impl_id": "verus-real-external_fn_specification-f779ffb1cb56-fail-test_when_used_as_spec_modules-49",
"category": "model_miss",
"label": "Model top-1 \u2260 gold (honest miss)",
"blurb": "Model's top-1 line (15) is not the gold bug ([19]). Shown to be transparent about failure modes.",
"stats": {
"n_lines": 22,
"n_buggy": 1,
"top1_energy": -3.3586323261260986,
"top1_line_idx": 15
},
"x": -3.4095091819763184,
"y": 4.055099010467529,
"whole_impl_energy": -6.971179962158203
},
{
"impl_id": "verus-real-match-b5a763a28f68-fail-test_if_let2_fails1-11",
"category": "model_miss",
"label": "Model top-1 \u2260 gold (honest miss)",
"blurb": "Model's top-1 line (3) is not the gold bug ([5]). Shown to be transparent about failure modes.",
"stats": {
"n_lines": 8,
"n_buggy": 1,
"top1_energy": -3.1037395000457764,
"top1_line_idx": 3
},
"x": 1.0342881679534912,
"y": 1.0872387886047363,
"whole_impl_energy": -9.956635475158691
},
{
"impl_id": "verus-real-assert_bitvector_by-2253af8ddec6-pass-test2-1",
"category": "pass_low_energy",
"label": "PASS impl, low energy",
"blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.",
"stats": {
"n_lines": 9,
"whole_impl_energy": -24.71211051940918
},
"x": -3.3124241828918457,
"y": 0.9528474807739258,
"whole_impl_energy": -24.71211051940918
},
{
"impl_id": "verus-real-loops-da39a3ee5e6b-pass-example_loop_continue-9",
"category": "pass_low_energy",
"label": "PASS impl, low energy",
"blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.",
"stats": {
"n_lines": 19,
"whole_impl_energy": -19.89792823791504
},
"x": 6.46060848236084,
"y": -1.755544900894165,
"whole_impl_energy": -19.89792823791504
},
{
"impl_id": "verus-real-quantifiers-be32d67719d5-pass-test1-0",
"category": "pass_low_energy",
"label": "PASS impl, low energy",
"blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.",
"stats": {
"n_lines": 11,
"whole_impl_energy": -22.63016700744629
},
"x": 0.26710984110832214,
"y": -0.7138252258300781,
"whole_impl_energy": -22.63016700744629
},
{
"impl_id": "verus-real-adts-1940c966bbb4-pass-test_update2-0",
"category": "pass_low_energy",
"label": "PASS impl, low energy",
"blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.",
"stats": {
"n_lines": 9,
"whole_impl_energy": -8.462407112121582
},
"x": 3.881700277328491,
"y": -3.797964334487915,
"whole_impl_energy": -8.462407112121582
}
]