Spaces:
Running
Running
| [ | |
| { | |
| "impl_id": "verus-real-match-329b5a3c6511-fail-test1_fails-0", | |
| "category": "model_win", | |
| "label": "Model top-1 = gold bug", | |
| "blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.", | |
| "stats": { | |
| "n_lines": 23, | |
| "n_buggy": 1, | |
| "top1_energy": -1.6794092655181885, | |
| "top1_line_idx": 20 | |
| }, | |
| "x": 6.765385627746582, | |
| "y": 0.7981084585189819, | |
| "whole_impl_energy": 0.5201228857040405 | |
| }, | |
| { | |
| "impl_id": "verus-real-loops-b620c1b9261f-fail-test_variables_havoc_nested-4", | |
| "category": "model_win", | |
| "label": "Model top-1 = gold bug", | |
| "blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.", | |
| "stats": { | |
| "n_lines": 17, | |
| "n_buggy": 1, | |
| "top1_energy": -4.065792560577393, | |
| "top1_line_idx": 14 | |
| }, | |
| "x": -4.582046985626221, | |
| "y": 1.6073641777038574, | |
| "whole_impl_energy": -5.544034481048584 | |
| }, | |
| { | |
| "impl_id": "verus-real-integer_ring-6b47daab383f-fail-div_by_zero_fail-4", | |
| "category": "model_win", | |
| "label": "Model top-1 = gold bug", | |
| "blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.", | |
| "stats": { | |
| "n_lines": 8, | |
| "n_buggy": 1, | |
| "top1_energy": -2.3478376865386963, | |
| "top1_line_idx": 6 | |
| }, | |
| "x": 0.23251426219940186, | |
| "y": 1.1754875183105469, | |
| "whole_impl_energy": -6.044948101043701 | |
| }, | |
| { | |
| "impl_id": "verus-real-scope-da39a3ee5e6b-fail-test1_fails-0", | |
| "category": "model_win", | |
| "label": "Model top-1 = gold bug", | |
| "blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.", | |
| "stats": { | |
| "n_lines": 10, | |
| "n_buggy": 1, | |
| "top1_energy": -3.7255990505218506, | |
| "top1_line_idx": 8 | |
| }, | |
| "x": 4.069056034088135, | |
| "y": 0.17309078574180603, | |
| "whole_impl_energy": 0.7895070314407349 | |
| }, | |
| { | |
| "impl_id": "verus-real-assert_by_compute-90698af23ac9-fail-default_impl_1_issue1406-10", | |
| "category": "model_win", | |
| "label": "Model top-1 = gold bug", | |
| "blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.", | |
| "stats": { | |
| "n_lines": 14, | |
| "n_buggy": 1, | |
| "top1_energy": -4.291623592376709, | |
| "top1_line_idx": 12 | |
| }, | |
| "x": -2.085197925567627, | |
| "y": 1.599167823791504, | |
| "whole_impl_energy": -4.477553367614746 | |
| }, | |
| { | |
| "impl_id": "verus-real-traits-14220565a25f-fail-test_verify_6-48", | |
| "category": "model_win", | |
| "label": "Model top-1 = gold bug", | |
| "blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.", | |
| "stats": { | |
| "n_lines": 24, | |
| "n_buggy": 2, | |
| "top1_energy": -3.4990246295928955, | |
| "top1_line_idx": 22 | |
| }, | |
| "x": 5.509369850158691, | |
| "y": 1.9690308570861816, | |
| "whole_impl_energy": 0.9066431522369385 | |
| }, | |
| { | |
| "impl_id": "verus-real-modes-80702303081f-fail-match_in_pure_expr-64", | |
| "category": "model_win", | |
| "label": "Model top-1 = gold bug", | |
| "blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.", | |
| "stats": { | |
| "n_lines": 9, | |
| "n_buggy": 1, | |
| "top1_energy": -4.125854015350342, | |
| "top1_line_idx": 1 | |
| }, | |
| "x": -3.986729621887207, | |
| "y": 0.5290116667747498, | |
| "whole_impl_energy": 1.928382396697998 | |
| }, | |
| { | |
| "impl_id": "verus-real-opaque_types-582cbcfe88e6-fail-test_return_opaque_type_reveal_real_type-1", | |
| "category": "model_win", | |
| "label": "Model top-1 = gold bug", | |
| "blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.", | |
| "stats": { | |
| "n_lines": 11, | |
| "n_buggy": 1, | |
| "top1_energy": -3.682386636734009, | |
| "top1_line_idx": 9 | |
| }, | |
| "x": 5.55057954788208, | |
| "y": 1.4102814197540283, | |
| "whole_impl_energy": 1.1414135694503784 | |
| }, | |
| { | |
| "impl_id": "verus-real-adts-1940c966bbb4-fail-test_update2_fails-0", | |
| "category": "model_miss", | |
| "label": "Model top-1 \u2260 gold (honest miss)", | |
| "blurb": "Model's top-1 line (6) is not the gold bug ([5]). Shown to be transparent about failure modes.", | |
| "stats": { | |
| "n_lines": 8, | |
| "n_buggy": 1, | |
| "top1_energy": -3.5674126148223877, | |
| "top1_line_idx": 6 | |
| }, | |
| "x": -0.5256160497665405, | |
| "y": -4.552791118621826, | |
| "whole_impl_energy": 1.9436787366867065 | |
| }, | |
| { | |
| "impl_id": "verus-real-external_fn_specification-7c0867550a70-fail-test_basics-0", | |
| "category": "model_miss", | |
| "label": "Model top-1 \u2260 gold (honest miss)", | |
| "blurb": "Model's top-1 line (12) is not the gold bug ([15, 19]). Shown to be transparent about failure modes.", | |
| "stats": { | |
| "n_lines": 21, | |
| "n_buggy": 2, | |
| "top1_energy": -1.7063446044921875, | |
| "top1_line_idx": 12 | |
| }, | |
| "x": 6.764942646026611, | |
| "y": 0.9527754783630371, | |
| "whole_impl_energy": -5.709773063659668 | |
| }, | |
| { | |
| "impl_id": "verus-real-external_fn_specification-f779ffb1cb56-fail-test_when_used_as_spec_modules-49", | |
| "category": "model_miss", | |
| "label": "Model top-1 \u2260 gold (honest miss)", | |
| "blurb": "Model's top-1 line (15) is not the gold bug ([19]). Shown to be transparent about failure modes.", | |
| "stats": { | |
| "n_lines": 22, | |
| "n_buggy": 1, | |
| "top1_energy": -3.3586323261260986, | |
| "top1_line_idx": 15 | |
| }, | |
| "x": -3.4095091819763184, | |
| "y": 4.055099010467529, | |
| "whole_impl_energy": -6.971179962158203 | |
| }, | |
| { | |
| "impl_id": "verus-real-match-b5a763a28f68-fail-test_if_let2_fails1-11", | |
| "category": "model_miss", | |
| "label": "Model top-1 \u2260 gold (honest miss)", | |
| "blurb": "Model's top-1 line (3) is not the gold bug ([5]). Shown to be transparent about failure modes.", | |
| "stats": { | |
| "n_lines": 8, | |
| "n_buggy": 1, | |
| "top1_energy": -3.1037395000457764, | |
| "top1_line_idx": 3 | |
| }, | |
| "x": 1.0342881679534912, | |
| "y": 1.0872387886047363, | |
| "whole_impl_energy": -9.956635475158691 | |
| }, | |
| { | |
| "impl_id": "verus-real-assert_bitvector_by-2253af8ddec6-pass-test2-1", | |
| "category": "pass_low_energy", | |
| "label": "PASS impl, low energy", | |
| "blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.", | |
| "stats": { | |
| "n_lines": 9, | |
| "whole_impl_energy": -24.71211051940918 | |
| }, | |
| "x": -3.3124241828918457, | |
| "y": 0.9528474807739258, | |
| "whole_impl_energy": -24.71211051940918 | |
| }, | |
| { | |
| "impl_id": "verus-real-loops-da39a3ee5e6b-pass-example_loop_continue-9", | |
| "category": "pass_low_energy", | |
| "label": "PASS impl, low energy", | |
| "blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.", | |
| "stats": { | |
| "n_lines": 19, | |
| "whole_impl_energy": -19.89792823791504 | |
| }, | |
| "x": 6.46060848236084, | |
| "y": -1.755544900894165, | |
| "whole_impl_energy": -19.89792823791504 | |
| }, | |
| { | |
| "impl_id": "verus-real-quantifiers-be32d67719d5-pass-test1-0", | |
| "category": "pass_low_energy", | |
| "label": "PASS impl, low energy", | |
| "blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.", | |
| "stats": { | |
| "n_lines": 11, | |
| "whole_impl_energy": -22.63016700744629 | |
| }, | |
| "x": 0.26710984110832214, | |
| "y": -0.7138252258300781, | |
| "whole_impl_energy": -22.63016700744629 | |
| }, | |
| { | |
| "impl_id": "verus-real-adts-1940c966bbb4-pass-test_update2-0", | |
| "category": "pass_low_energy", | |
| "label": "PASS impl, low energy", | |
| "blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.", | |
| "stats": { | |
| "n_lines": 9, | |
| "whole_impl_energy": -8.462407112121582 | |
| }, | |
| "x": 3.881700277328491, | |
| "y": -3.797964334487915, | |
| "whole_impl_energy": -8.462407112121582 | |
| } | |
| ] |