| { | |
| "mean_improvement": -0.29151748216617807, | |
| "n_kernels": 64, | |
| "per_kernel": { | |
| "gemm_tile(2,2,2)": { | |
| "improvement": -0.0191, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 419, | |
| "final_cycles": 411 | |
| }, | |
| "gemm_tile(2,2,4)": { | |
| "improvement": -0.0363, | |
| "actions": [ | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 441, | |
| "final_cycles": 425 | |
| }, | |
| "gemm_tile(2,2,6)": { | |
| "improvement": -0.1475, | |
| "actions": [ | |
| "vec_ld", | |
| "vec_st", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 522, | |
| "final_cycles": 445 | |
| }, | |
| "gemm_tile(2,2,8)": { | |
| "improvement": -0.1373, | |
| "actions": [ | |
| "cache_cs", | |
| "vec_ld", | |
| "vec_st", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 539, | |
| "final_cycles": 465 | |
| }, | |
| "gemm_tile(2,4,2)": { | |
| "improvement": -0.1237, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 477, | |
| "final_cycles": 418 | |
| }, | |
| "gemm_tile(2,4,4)": { | |
| "improvement": -0.1654, | |
| "actions": [ | |
| "vec_ld", | |
| "vec_st", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 544, | |
| "final_cycles": 454 | |
| }, | |
| "gemm_tile(2,4,6)": { | |
| "improvement": -0.1932, | |
| "actions": [ | |
| "cache_cs", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 647, | |
| "final_cycles": 522 | |
| }, | |
| "gemm_tile(2,4,8)": { | |
| "improvement": -0.361, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 856, | |
| "final_cycles": 547 | |
| }, | |
| "gemm_tile(2,6,2)": { | |
| "improvement": -0.1899, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 537, | |
| "final_cycles": 435 | |
| }, | |
| "gemm_tile(2,6,4)": { | |
| "improvement": -0.1651, | |
| "actions": [ | |
| "cache_cs", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 630, | |
| "final_cycles": 526 | |
| }, | |
| "gemm_tile(2,6,6)": { | |
| "improvement": -0.3322, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 891, | |
| "final_cycles": 595 | |
| }, | |
| "gemm_tile(2,6,8)": { | |
| "improvement": -0.493, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1278, | |
| "final_cycles": 648 | |
| }, | |
| "gemm_tile(2,8,2)": { | |
| "improvement": -0.1929, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 565, | |
| "final_cycles": 456 | |
| }, | |
| "gemm_tile(2,8,4)": { | |
| "improvement": -0.4123, | |
| "actions": [ | |
| "prefetch_L1", | |
| "st_cache_cs", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 912, | |
| "final_cycles": 536 | |
| }, | |
| "gemm_tile(2,8,6)": { | |
| "improvement": -0.2716, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 902, | |
| "final_cycles": 657 | |
| }, | |
| "gemm_tile(2,8,8)": { | |
| "improvement": -0.2637, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1005, | |
| "final_cycles": 740 | |
| }, | |
| "gemm_tile(4,2,2)": { | |
| "improvement": -0.0695, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 475, | |
| "final_cycles": 442 | |
| }, | |
| "gemm_tile(4,2,4)": { | |
| "improvement": -0.1343, | |
| "actions": [ | |
| "vec_ld", | |
| "vec_st", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 536, | |
| "final_cycles": 464 | |
| }, | |
| "gemm_tile(4,2,6)": { | |
| "improvement": -0.1936, | |
| "actions": [ | |
| "cache_cs", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 625, | |
| "final_cycles": 504 | |
| }, | |
| "gemm_tile(4,2,8)": { | |
| "improvement": -0.2722, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 753, | |
| "final_cycles": 548 | |
| }, | |
| "gemm_tile(4,4,2)": { | |
| "improvement": -0.1865, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 547, | |
| "final_cycles": 445 | |
| }, | |
| "gemm_tile(4,4,4)": { | |
| "improvement": -0.1693, | |
| "actions": [ | |
| "prefetch_L1", | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 691, | |
| "final_cycles": 574 | |
| }, | |
| "gemm_tile(4,4,6)": { | |
| "improvement": -0.3213, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 940, | |
| "final_cycles": 638 | |
| }, | |
| "gemm_tile(4,4,8)": { | |
| "improvement": -0.4308, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1258, | |
| "final_cycles": 716 | |
| }, | |
| "gemm_tile(4,6,2)": { | |
| "improvement": -0.2639, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 648, | |
| "final_cycles": 477 | |
| }, | |
| "gemm_tile(4,6,4)": { | |
| "improvement": -0.238, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 832, | |
| "final_cycles": 634 | |
| }, | |
| "gemm_tile(4,6,6)": { | |
| "improvement": -0.502, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1502, | |
| "final_cycles": 748 | |
| }, | |
| "gemm_tile(4,6,8)": { | |
| "improvement": -0.5383, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1839, | |
| "final_cycles": 849 | |
| }, | |
| "gemm_tile(4,8,2)": { | |
| "improvement": -0.2437, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 714, | |
| "final_cycles": 540 | |
| }, | |
| "gemm_tile(4,8,4)": { | |
| "improvement": -0.2336, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 959, | |
| "final_cycles": 735 | |
| }, | |
| "gemm_tile(4,8,6)": { | |
| "improvement": -0.2232, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1102, | |
| "final_cycles": 856 | |
| }, | |
| "gemm_tile(4,8,8)": { | |
| "improvement": -0.3196, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1430, | |
| "final_cycles": 973 | |
| }, | |
| "gemm_tile(6,2,2)": { | |
| "improvement": -0.1468, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 545, | |
| "final_cycles": 465 | |
| }, | |
| "gemm_tile(6,2,4)": { | |
| "improvement": -0.1749, | |
| "actions": [ | |
| "cache_cs", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 606, | |
| "final_cycles": 500 | |
| }, | |
| "gemm_tile(6,2,6)": { | |
| "improvement": -0.336, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 866, | |
| "final_cycles": 575 | |
| }, | |
| "gemm_tile(6,2,8)": { | |
| "improvement": -0.36, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 989, | |
| "final_cycles": 633 | |
| }, | |
| "gemm_tile(6,4,2)": { | |
| "improvement": -0.2897, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 673, | |
| "final_cycles": 478 | |
| }, | |
| "gemm_tile(6,4,4)": { | |
| "improvement": -0.2296, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 832, | |
| "final_cycles": 641 | |
| }, | |
| "gemm_tile(6,4,6)": { | |
| "improvement": -0.4646, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1386, | |
| "final_cycles": 742 | |
| }, | |
| "gemm_tile(6,4,8)": { | |
| "improvement": -0.4832, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1546, | |
| "final_cycles": 799 | |
| }, | |
| "gemm_tile(6,6,2)": { | |
| "improvement": -0.2968, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 775, | |
| "final_cycles": 545 | |
| }, | |
| "gemm_tile(6,6,4)": { | |
| "improvement": -0.3052, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 983, | |
| "final_cycles": 683 | |
| }, | |
| "gemm_tile(6,6,6)": { | |
| "improvement": -0.4544, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1635, | |
| "final_cycles": 892 | |
| }, | |
| "gemm_tile(6,6,8)": { | |
| "improvement": -0.4978, | |
| "actions": [ | |
| "maxnreg_255", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 2059, | |
| "final_cycles": 1034 | |
| }, | |
| "gemm_tile(6,8,2)": { | |
| "improvement": -0.2758, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 881, | |
| "final_cycles": 638 | |
| }, | |
| "gemm_tile(6,8,4)": { | |
| "improvement": -0.264, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1197, | |
| "final_cycles": 881 | |
| }, | |
| "gemm_tile(6,8,6)": { | |
| "improvement": -0.3264, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1578, | |
| "final_cycles": 1063 | |
| }, | |
| "gemm_tile(6,8,8)": { | |
| "improvement": -0.3011, | |
| "actions": [ | |
| "maxnreg_255", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1737, | |
| "final_cycles": 1214 | |
| }, | |
| "gemm_tile(8,2,2)": { | |
| "improvement": -0.1706, | |
| "actions": [ | |
| "vec_st", | |
| "vec_ld", | |
| "maxnreg_128", | |
| "stop" | |
| ], | |
| "baseline_cycles": 592, | |
| "final_cycles": 491 | |
| }, | |
| "gemm_tile(8,2,4)": { | |
| "improvement": -0.2521, | |
| "actions": [ | |
| "prefetch_L1", | |
| "st_cache_cs", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 722, | |
| "final_cycles": 540 | |
| }, | |
| "gemm_tile(8,2,6)": { | |
| "improvement": -0.4012, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1042, | |
| "final_cycles": 624 | |
| }, | |
| "gemm_tile(8,2,8)": { | |
| "improvement": -0.3817, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1158, | |
| "final_cycles": 716 | |
| }, | |
| "gemm_tile(8,4,2)": { | |
| "improvement": -0.297, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 744, | |
| "final_cycles": 523 | |
| }, | |
| "gemm_tile(8,4,4)": { | |
| "improvement": -0.3152, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1050, | |
| "final_cycles": 719 | |
| }, | |
| "gemm_tile(8,4,6)": { | |
| "improvement": -0.4074, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1436, | |
| "final_cycles": 851 | |
| }, | |
| "gemm_tile(8,4,8)": { | |
| "improvement": -0.4238, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1647, | |
| "final_cycles": 949 | |
| }, | |
| "gemm_tile(8,6,2)": { | |
| "improvement": -0.2839, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 863, | |
| "final_cycles": 618 | |
| }, | |
| "gemm_tile(8,6,4)": { | |
| "improvement": -0.4151, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1349, | |
| "final_cycles": 789 | |
| }, | |
| "gemm_tile(8,6,6)": { | |
| "improvement": -0.4326, | |
| "actions": [ | |
| "maxnreg_128", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1789, | |
| "final_cycles": 1015 | |
| }, | |
| "gemm_tile(8,6,8)": { | |
| "improvement": -0.4535, | |
| "actions": [ | |
| "maxnreg_255", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 2192, | |
| "final_cycles": 1198 | |
| }, | |
| "gemm_tile(8,8,2)": { | |
| "improvement": -0.2404, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 965, | |
| "final_cycles": 733 | |
| }, | |
| "gemm_tile(8,8,4)": { | |
| "improvement": -0.3331, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_128", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 1540, | |
| "final_cycles": 1027 | |
| }, | |
| "gemm_tile(8,8,6)": { | |
| "improvement": -0.3909, | |
| "actions": [ | |
| "vec_st", | |
| "maxnreg_255", | |
| "vec_ld", | |
| "stop" | |
| ], | |
| "baseline_cycles": 2062, | |
| "final_cycles": 1256 | |
| }, | |
| "gemm_tile(8,8,8)": { | |
| "improvement": -0.4082, | |
| "actions": [ | |
| "maxnreg_255", | |
| "vec_ld", | |
| "vec_st", | |
| "stop" | |
| ], | |
| "baseline_cycles": 2452, | |
| "final_cycles": 1451 | |
| } | |
| }, | |
| "action_distribution": { | |
| "vec_st": 0.246, | |
| "vec_ld": 0.246, | |
| "stop": 0.246, | |
| "maxnreg_128": 0.204, | |
| "cache_cs": 0.019, | |
| "maxnreg_255": 0.019, | |
| "prefetch_L1": 0.012, | |
| "st_cache_cs": 0.008 | |
| }, | |
| "unique_sequences": 12 | |
| } |