| """ |
| Reference implementation for Grayscale Triton kernel. |
| Y = 0.2989 R + 0.5870 G + 0.1140 B |
| |
| Input: (H, W, 3) float32 RGB tensor |
| Output: (H, W) float32 grayscale tensor |
| """ |
|
|
| import torch |
|
|
| |
| |
| |
|
|
| SCORE_SCALE = 3000.0 |
|
|
| |
| BENCH_USE_CUDA_EVENTS = True |
| BENCH_REL_ERROR = 0.001 |
| BENCH_WALL_TIMEOUT_NS = 120e9 |
| BENCH_NO_GRAD = False |
| BENCH_MAX_REPEATS = 100 |
| BENCH_MAX_TIME_NS = 10e9 |
| BENCH_WARMUP_STYLE = 'tiny_benchmark' |
|
|
| |
| |
| |
|
|
| TEST_CASES = [ |
| {"size": 256, "seed": 42}, |
| {"size": 512, "seed": 123}, |
| {"size": 1024, "seed": 456}, |
| {"size": 2048, "seed": 789}, |
| ] |
|
|
| BENCHMARK_CASES = [ |
| {"size": 1024, "seed": 1001}, |
| {"size": 2048, "seed": 1002}, |
| {"size": 4096, "seed": 1003}, |
| {"size": 8192, "seed": 1004}, |
| ] |
|
|
| |
| |
| |
|
|
|
|
| def ref_kernel(data): |
| """Reference: Y = 0.2989 R + 0.5870 G + 0.1140 B""" |
| rgb, output = data |
| weights = torch.tensor([0.2989, 0.5870, 0.1140], device=rgb.device, dtype=rgb.dtype) |
| output[...] = torch.sum(rgb * weights, dim=-1) |
| return output |
|
|
|
|
| def generate_input(size, seed): |
| gen = torch.Generator(device="cuda") |
| gen.manual_seed(seed) |
| x = torch.rand(size, size, 3, device="cuda", dtype=torch.float32, generator=gen).contiguous() |
| y = torch.empty(size, size, device="cuda", dtype=torch.float32).contiguous() |
| return x, y |
|
|
|
|
| def check_implementation(data, submission_output, rtol=1e-4, atol=1e-4): |
| ref_output = ref_kernel(data) |
| if submission_output.shape != ref_output.shape: |
| return False, f"Shape mismatch: expected {ref_output.shape}, got {submission_output.shape}" |
| if torch.allclose(submission_output, ref_output, rtol=rtol, atol=atol): |
| return True, "Match" |
| diff = torch.abs(submission_output.float() - ref_output.float()) |
| return False, f"Output mismatch: max_diff={diff.max().item():.6f}" |
|
|
|
|
| |
| |
| |
|
|
| MODAL_REFERENCE_CODE = r''' |
| import torch |
| |
| def ref_kernel(data): |
| rgb, output = data |
| weights = torch.tensor([0.2989, 0.5870, 0.1140], device=rgb.device, dtype=rgb.dtype) |
| output[...] = torch.sum(rgb * weights, dim=-1) |
| return output |
| |
| def generate_input(size, seed): |
| gen = torch.Generator(device="cuda") |
| gen.manual_seed(seed) |
| x = torch.rand(size, size, 3, device="cuda", dtype=torch.float32, generator=gen).contiguous() |
| y = torch.empty(size, size, device="cuda", dtype=torch.float32).contiguous() |
| return x, y |
| |
| def check_implementation(data, submission_output, rtol=1e-4, atol=1e-4): |
| ref_output = ref_kernel(data) |
| if submission_output.shape != ref_output.shape: |
| return False, f"Shape mismatch: expected {ref_output.shape}, got {submission_output.shape}" |
| if torch.allclose(submission_output, ref_output, rtol=rtol, atol=atol): |
| return True, "Match" |
| diff = torch.abs(submission_output.float() - ref_output.float()) |
| return False, f"Output mismatch: max_diff={diff.max().item():.6f}" |
| ''' |
|
|