chronos-poc / training_result.json
JayLuci4's picture
Chronos PoC: PTX transform selection via RLVR (DA-GRPO)
adbcafd verified
{
"mean_improvement": -0.29151748216617807,
"n_kernels": 64,
"per_kernel": {
"gemm_tile(2,2,2)": {
"improvement": -0.0191,
"actions": [
"vec_st",
"vec_ld",
"stop"
],
"baseline_cycles": 419,
"final_cycles": 411
},
"gemm_tile(2,2,4)": {
"improvement": -0.0363,
"actions": [
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 441,
"final_cycles": 425
},
"gemm_tile(2,2,6)": {
"improvement": -0.1475,
"actions": [
"vec_ld",
"vec_st",
"maxnreg_128",
"stop"
],
"baseline_cycles": 522,
"final_cycles": 445
},
"gemm_tile(2,2,8)": {
"improvement": -0.1373,
"actions": [
"cache_cs",
"vec_ld",
"vec_st",
"maxnreg_128",
"stop"
],
"baseline_cycles": 539,
"final_cycles": 465
},
"gemm_tile(2,4,2)": {
"improvement": -0.1237,
"actions": [
"vec_st",
"vec_ld",
"stop"
],
"baseline_cycles": 477,
"final_cycles": 418
},
"gemm_tile(2,4,4)": {
"improvement": -0.1654,
"actions": [
"vec_ld",
"vec_st",
"maxnreg_128",
"stop"
],
"baseline_cycles": 544,
"final_cycles": 454
},
"gemm_tile(2,4,6)": {
"improvement": -0.1932,
"actions": [
"cache_cs",
"vec_ld",
"maxnreg_128",
"vec_st",
"stop"
],
"baseline_cycles": 647,
"final_cycles": 522
},
"gemm_tile(2,4,8)": {
"improvement": -0.361,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 856,
"final_cycles": 547
},
"gemm_tile(2,6,2)": {
"improvement": -0.1899,
"actions": [
"vec_st",
"vec_ld",
"stop"
],
"baseline_cycles": 537,
"final_cycles": 435
},
"gemm_tile(2,6,4)": {
"improvement": -0.1651,
"actions": [
"cache_cs",
"vec_ld",
"maxnreg_128",
"vec_st",
"stop"
],
"baseline_cycles": 630,
"final_cycles": 526
},
"gemm_tile(2,6,6)": {
"improvement": -0.3322,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 891,
"final_cycles": 595
},
"gemm_tile(2,6,8)": {
"improvement": -0.493,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1278,
"final_cycles": 648
},
"gemm_tile(2,8,2)": {
"improvement": -0.1929,
"actions": [
"vec_st",
"vec_ld",
"maxnreg_128",
"stop"
],
"baseline_cycles": 565,
"final_cycles": 456
},
"gemm_tile(2,8,4)": {
"improvement": -0.4123,
"actions": [
"prefetch_L1",
"st_cache_cs",
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 912,
"final_cycles": 536
},
"gemm_tile(2,8,6)": {
"improvement": -0.2716,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 902,
"final_cycles": 657
},
"gemm_tile(2,8,8)": {
"improvement": -0.2637,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1005,
"final_cycles": 740
},
"gemm_tile(4,2,2)": {
"improvement": -0.0695,
"actions": [
"vec_st",
"vec_ld",
"stop"
],
"baseline_cycles": 475,
"final_cycles": 442
},
"gemm_tile(4,2,4)": {
"improvement": -0.1343,
"actions": [
"vec_ld",
"vec_st",
"maxnreg_128",
"stop"
],
"baseline_cycles": 536,
"final_cycles": 464
},
"gemm_tile(4,2,6)": {
"improvement": -0.1936,
"actions": [
"cache_cs",
"vec_ld",
"maxnreg_128",
"vec_st",
"stop"
],
"baseline_cycles": 625,
"final_cycles": 504
},
"gemm_tile(4,2,8)": {
"improvement": -0.2722,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 753,
"final_cycles": 548
},
"gemm_tile(4,4,2)": {
"improvement": -0.1865,
"actions": [
"vec_st",
"vec_ld",
"maxnreg_128",
"stop"
],
"baseline_cycles": 547,
"final_cycles": 445
},
"gemm_tile(4,4,4)": {
"improvement": -0.1693,
"actions": [
"prefetch_L1",
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 691,
"final_cycles": 574
},
"gemm_tile(4,4,6)": {
"improvement": -0.3213,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 940,
"final_cycles": 638
},
"gemm_tile(4,4,8)": {
"improvement": -0.4308,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1258,
"final_cycles": 716
},
"gemm_tile(4,6,2)": {
"improvement": -0.2639,
"actions": [
"vec_st",
"vec_ld",
"maxnreg_128",
"stop"
],
"baseline_cycles": 648,
"final_cycles": 477
},
"gemm_tile(4,6,4)": {
"improvement": -0.238,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 832,
"final_cycles": 634
},
"gemm_tile(4,6,6)": {
"improvement": -0.502,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1502,
"final_cycles": 748
},
"gemm_tile(4,6,8)": {
"improvement": -0.5383,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1839,
"final_cycles": 849
},
"gemm_tile(4,8,2)": {
"improvement": -0.2437,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 714,
"final_cycles": 540
},
"gemm_tile(4,8,4)": {
"improvement": -0.2336,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 959,
"final_cycles": 735
},
"gemm_tile(4,8,6)": {
"improvement": -0.2232,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1102,
"final_cycles": 856
},
"gemm_tile(4,8,8)": {
"improvement": -0.3196,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1430,
"final_cycles": 973
},
"gemm_tile(6,2,2)": {
"improvement": -0.1468,
"actions": [
"vec_st",
"vec_ld",
"stop"
],
"baseline_cycles": 545,
"final_cycles": 465
},
"gemm_tile(6,2,4)": {
"improvement": -0.1749,
"actions": [
"cache_cs",
"vec_ld",
"maxnreg_128",
"vec_st",
"stop"
],
"baseline_cycles": 606,
"final_cycles": 500
},
"gemm_tile(6,2,6)": {
"improvement": -0.336,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 866,
"final_cycles": 575
},
"gemm_tile(6,2,8)": {
"improvement": -0.36,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 989,
"final_cycles": 633
},
"gemm_tile(6,4,2)": {
"improvement": -0.2897,
"actions": [
"vec_st",
"vec_ld",
"maxnreg_128",
"stop"
],
"baseline_cycles": 673,
"final_cycles": 478
},
"gemm_tile(6,4,4)": {
"improvement": -0.2296,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 832,
"final_cycles": 641
},
"gemm_tile(6,4,6)": {
"improvement": -0.4646,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1386,
"final_cycles": 742
},
"gemm_tile(6,4,8)": {
"improvement": -0.4832,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1546,
"final_cycles": 799
},
"gemm_tile(6,6,2)": {
"improvement": -0.2968,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 775,
"final_cycles": 545
},
"gemm_tile(6,6,4)": {
"improvement": -0.3052,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 983,
"final_cycles": 683
},
"gemm_tile(6,6,6)": {
"improvement": -0.4544,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1635,
"final_cycles": 892
},
"gemm_tile(6,6,8)": {
"improvement": -0.4978,
"actions": [
"maxnreg_255",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 2059,
"final_cycles": 1034
},
"gemm_tile(6,8,2)": {
"improvement": -0.2758,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 881,
"final_cycles": 638
},
"gemm_tile(6,8,4)": {
"improvement": -0.264,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 1197,
"final_cycles": 881
},
"gemm_tile(6,8,6)": {
"improvement": -0.3264,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1578,
"final_cycles": 1063
},
"gemm_tile(6,8,8)": {
"improvement": -0.3011,
"actions": [
"maxnreg_255",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1737,
"final_cycles": 1214
},
"gemm_tile(8,2,2)": {
"improvement": -0.1706,
"actions": [
"vec_st",
"vec_ld",
"maxnreg_128",
"stop"
],
"baseline_cycles": 592,
"final_cycles": 491
},
"gemm_tile(8,2,4)": {
"improvement": -0.2521,
"actions": [
"prefetch_L1",
"st_cache_cs",
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 722,
"final_cycles": 540
},
"gemm_tile(8,2,6)": {
"improvement": -0.4012,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1042,
"final_cycles": 624
},
"gemm_tile(8,2,8)": {
"improvement": -0.3817,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1158,
"final_cycles": 716
},
"gemm_tile(8,4,2)": {
"improvement": -0.297,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 744,
"final_cycles": 523
},
"gemm_tile(8,4,4)": {
"improvement": -0.3152,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1050,
"final_cycles": 719
},
"gemm_tile(8,4,6)": {
"improvement": -0.4074,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1436,
"final_cycles": 851
},
"gemm_tile(8,4,8)": {
"improvement": -0.4238,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1647,
"final_cycles": 949
},
"gemm_tile(8,6,2)": {
"improvement": -0.2839,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 863,
"final_cycles": 618
},
"gemm_tile(8,6,4)": {
"improvement": -0.4151,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 1349,
"final_cycles": 789
},
"gemm_tile(8,6,6)": {
"improvement": -0.4326,
"actions": [
"maxnreg_128",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 1789,
"final_cycles": 1015
},
"gemm_tile(8,6,8)": {
"improvement": -0.4535,
"actions": [
"maxnreg_255",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 2192,
"final_cycles": 1198
},
"gemm_tile(8,8,2)": {
"improvement": -0.2404,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 965,
"final_cycles": 733
},
"gemm_tile(8,8,4)": {
"improvement": -0.3331,
"actions": [
"vec_st",
"maxnreg_128",
"vec_ld",
"stop"
],
"baseline_cycles": 1540,
"final_cycles": 1027
},
"gemm_tile(8,8,6)": {
"improvement": -0.3909,
"actions": [
"vec_st",
"maxnreg_255",
"vec_ld",
"stop"
],
"baseline_cycles": 2062,
"final_cycles": 1256
},
"gemm_tile(8,8,8)": {
"improvement": -0.4082,
"actions": [
"maxnreg_255",
"vec_ld",
"vec_st",
"stop"
],
"baseline_cycles": 2452,
"final_cycles": 1451
}
},
"action_distribution": {
"vec_st": 0.246,
"vec_ld": 0.246,
"stop": 0.246,
"maxnreg_128": 0.204,
"cache_cs": 0.019,
"maxnreg_255": 0.019,
"prefetch_L1": 0.012,
"st_cache_cs": 0.008
},
"unique_sequences": 12
}