Spaces:

OzLabs
/

VericodingEBM-demo

Running

App Files Files Community

VericodingEBM-demo / data /examples.json

guychuk

deploy demo static site

92c6c3e verified about 1 month ago

Raw

History Blame Contribute Delete

7.62 kB

	[
	{
	"impl_id": "verus-real-match-329b5a3c6511-fail-test1_fails-0",
	"category": "model_win",
	"label": "Model top-1 = gold bug",
	"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
	"stats": {
	"n_lines": 23,
	"n_buggy": 1,
	"top1_energy": -1.6794092655181885,
	"top1_line_idx": 20
	},
	"x": 6.765385627746582,
	"y": 0.7981084585189819,
	"whole_impl_energy": 0.5201228857040405
	},
	{
	"impl_id": "verus-real-loops-b620c1b9261f-fail-test_variables_havoc_nested-4",
	"category": "model_win",
	"label": "Model top-1 = gold bug",
	"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
	"stats": {
	"n_lines": 17,
	"n_buggy": 1,
	"top1_energy": -4.065792560577393,
	"top1_line_idx": 14
	},
	"x": -4.582046985626221,
	"y": 1.6073641777038574,
	"whole_impl_energy": -5.544034481048584
	},
	{
	"impl_id": "verus-real-integer_ring-6b47daab383f-fail-div_by_zero_fail-4",
	"category": "model_win",
	"label": "Model top-1 = gold bug",
	"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
	"stats": {
	"n_lines": 8,
	"n_buggy": 1,
	"top1_energy": -2.3478376865386963,
	"top1_line_idx": 6
	},
	"x": 0.23251426219940186,
	"y": 1.1754875183105469,
	"whole_impl_energy": -6.044948101043701
	},
	{
	"impl_id": "verus-real-scope-da39a3ee5e6b-fail-test1_fails-0",
	"category": "model_win",
	"label": "Model top-1 = gold bug",
	"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
	"stats": {
	"n_lines": 10,
	"n_buggy": 1,
	"top1_energy": -3.7255990505218506,
	"top1_line_idx": 8
	},
	"x": 4.069056034088135,
	"y": 0.17309078574180603,
	"whole_impl_energy": 0.7895070314407349
	},
	{
	"impl_id": "verus-real-assert_by_compute-90698af23ac9-fail-default_impl_1_issue1406-10",
	"category": "model_win",
	"label": "Model top-1 = gold bug",
	"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
	"stats": {
	"n_lines": 14,
	"n_buggy": 1,
	"top1_energy": -4.291623592376709,
	"top1_line_idx": 12
	},
	"x": -2.085197925567627,
	"y": 1.599167823791504,
	"whole_impl_energy": -4.477553367614746
	},
	{
	"impl_id": "verus-real-traits-14220565a25f-fail-test_verify_6-48",
	"category": "model_win",
	"label": "Model top-1 = gold bug",
	"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
	"stats": {
	"n_lines": 24,
	"n_buggy": 2,
	"top1_energy": -3.4990246295928955,
	"top1_line_idx": 22
	},
	"x": 5.509369850158691,
	"y": 1.9690308570861816,
	"whole_impl_energy": 0.9066431522369385
	},
	{
	"impl_id": "verus-real-modes-80702303081f-fail-match_in_pure_expr-64",
	"category": "model_win",
	"label": "Model top-1 = gold bug",
	"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
	"stats": {
	"n_lines": 9,
	"n_buggy": 1,
	"top1_energy": -4.125854015350342,
	"top1_line_idx": 1
	},
	"x": -3.986729621887207,
	"y": 0.5290116667747498,
	"whole_impl_energy": 1.928382396697998
	},
	{
	"impl_id": "verus-real-opaque_types-582cbcfe88e6-fail-test_return_opaque_type_reveal_real_type-1",
	"category": "model_win",
	"label": "Model top-1 = gold bug",
	"blurb": "Model's highest-energy line is one of the gold-labeled buggy lines.",
	"stats": {
	"n_lines": 11,
	"n_buggy": 1,
	"top1_energy": -3.682386636734009,
	"top1_line_idx": 9
	},
	"x": 5.55057954788208,
	"y": 1.4102814197540283,
	"whole_impl_energy": 1.1414135694503784
	},
	{
	"impl_id": "verus-real-adts-1940c966bbb4-fail-test_update2_fails-0",
	"category": "model_miss",
	"label": "Model top-1 \u2260 gold (honest miss)",
	"blurb": "Model's top-1 line (6) is not the gold bug ([5]). Shown to be transparent about failure modes.",
	"stats": {
	"n_lines": 8,
	"n_buggy": 1,
	"top1_energy": -3.5674126148223877,
	"top1_line_idx": 6
	},
	"x": -0.5256160497665405,
	"y": -4.552791118621826,
	"whole_impl_energy": 1.9436787366867065
	},
	{
	"impl_id": "verus-real-external_fn_specification-7c0867550a70-fail-test_basics-0",
	"category": "model_miss",
	"label": "Model top-1 \u2260 gold (honest miss)",
	"blurb": "Model's top-1 line (12) is not the gold bug ([15, 19]). Shown to be transparent about failure modes.",
	"stats": {
	"n_lines": 21,
	"n_buggy": 2,
	"top1_energy": -1.7063446044921875,
	"top1_line_idx": 12
	},
	"x": 6.764942646026611,
	"y": 0.9527754783630371,
	"whole_impl_energy": -5.709773063659668
	},
	{
	"impl_id": "verus-real-external_fn_specification-f779ffb1cb56-fail-test_when_used_as_spec_modules-49",
	"category": "model_miss",
	"label": "Model top-1 \u2260 gold (honest miss)",
	"blurb": "Model's top-1 line (15) is not the gold bug ([19]). Shown to be transparent about failure modes.",
	"stats": {
	"n_lines": 22,
	"n_buggy": 1,
	"top1_energy": -3.3586323261260986,
	"top1_line_idx": 15
	},
	"x": -3.4095091819763184,
	"y": 4.055099010467529,
	"whole_impl_energy": -6.971179962158203
	},
	{
	"impl_id": "verus-real-match-b5a763a28f68-fail-test_if_let2_fails1-11",
	"category": "model_miss",
	"label": "Model top-1 \u2260 gold (honest miss)",
	"blurb": "Model's top-1 line (3) is not the gold bug ([5]). Shown to be transparent about failure modes.",
	"stats": {
	"n_lines": 8,
	"n_buggy": 1,
	"top1_energy": -3.1037395000457764,
	"top1_line_idx": 3
	},
	"x": 1.0342881679534912,
	"y": 1.0872387886047363,
	"whole_impl_energy": -9.956635475158691
	},
	{
	"impl_id": "verus-real-assert_bitvector_by-2253af8ddec6-pass-test2-1",
	"category": "pass_low_energy",
	"label": "PASS impl, low energy",
	"blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.",
	"stats": {
	"n_lines": 9,
	"whole_impl_energy": -24.71211051940918
	},
	"x": -3.3124241828918457,
	"y": 0.9528474807739258,
	"whole_impl_energy": -24.71211051940918
	},
	{
	"impl_id": "verus-real-loops-da39a3ee5e6b-pass-example_loop_continue-9",
	"category": "pass_low_energy",
	"label": "PASS impl, low energy",
	"blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.",
	"stats": {
	"n_lines": 19,
	"whole_impl_energy": -19.89792823791504
	},
	"x": 6.46060848236084,
	"y": -1.755544900894165,
	"whole_impl_energy": -19.89792823791504
	},
	{
	"impl_id": "verus-real-quantifiers-be32d67719d5-pass-test1-0",
	"category": "pass_low_energy",
	"label": "PASS impl, low energy",
	"blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.",
	"stats": {
	"n_lines": 11,
	"whole_impl_energy": -22.63016700744629
	},
	"x": 0.26710984110832214,
	"y": -0.7138252258300781,
	"whole_impl_energy": -22.63016700744629
	},
	{
	"impl_id": "verus-real-adts-1940c966bbb4-pass-test_update2-0",
	"category": "pass_low_energy",
	"label": "PASS impl, low energy",
	"blurb": "Model assigns low energy across the board \u2014 model agrees this is clean.",
	"stats": {
	"n_lines": 9,
	"whole_impl_energy": -8.462407112121582
	},
	"x": 3.881700277328491,
	"y": -3.797964334487915,
	"whole_impl_energy": -8.462407112121582
	}
	]