File size: 3,695 Bytes
16dd578
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""
Reference implementation for Grayscale Triton kernel.
Y = 0.2989 R + 0.5870 G + 0.1140 B

Input:  (H, W, 3) float32 RGB tensor
Output: (H, W) float32 grayscale tensor
"""

import torch

# ---------------------------------------------------------------------------
# Scoring and benchmark configuration (read by shared_eval.py)
# ---------------------------------------------------------------------------

SCORE_SCALE = 3000.0

# grayscale uses CUDA events timing, 0.1% rel error, 120s wall clock timeout
BENCH_USE_CUDA_EVENTS = True
BENCH_REL_ERROR = 0.001
BENCH_WALL_TIMEOUT_NS = 120e9
BENCH_NO_GRAD = False
BENCH_MAX_REPEATS = 100
BENCH_MAX_TIME_NS = 10e9
BENCH_WARMUP_STYLE = 'tiny_benchmark'

# ---------------------------------------------------------------------------
# Test / benchmark cases
# ---------------------------------------------------------------------------

TEST_CASES = [
    {"size": 256, "seed": 42},
    {"size": 512, "seed": 123},
    {"size": 1024, "seed": 456},
    {"size": 2048, "seed": 789},
]

BENCHMARK_CASES = [
    {"size": 1024, "seed": 1001},
    {"size": 2048, "seed": 1002},
    {"size": 4096, "seed": 1003},
    {"size": 8192, "seed": 1004},
]

# ---------------------------------------------------------------------------
# Reference kernel
# ---------------------------------------------------------------------------


def ref_kernel(data):
    """Reference: Y = 0.2989 R + 0.5870 G + 0.1140 B"""
    rgb, output = data
    weights = torch.tensor([0.2989, 0.5870, 0.1140], device=rgb.device, dtype=rgb.dtype)
    output[...] = torch.sum(rgb * weights, dim=-1)
    return output


def generate_input(size, seed):
    gen = torch.Generator(device="cuda")
    gen.manual_seed(seed)
    x = torch.rand(size, size, 3, device="cuda", dtype=torch.float32, generator=gen).contiguous()
    y = torch.empty(size, size, device="cuda", dtype=torch.float32).contiguous()
    return x, y


def check_implementation(data, submission_output, rtol=1e-4, atol=1e-4):
    ref_output = ref_kernel(data)
    if submission_output.shape != ref_output.shape:
        return False, f"Shape mismatch: expected {ref_output.shape}, got {submission_output.shape}"
    if torch.allclose(submission_output, ref_output, rtol=rtol, atol=atol):
        return True, "Match"
    diff = torch.abs(submission_output.float() - ref_output.float())
    return False, f"Output mismatch: max_diff={diff.max().item():.6f}"


# ---------------------------------------------------------------------------
# Self-contained reference code for Modal remote execution
# ---------------------------------------------------------------------------

MODAL_REFERENCE_CODE = r'''
import torch

def ref_kernel(data):
    rgb, output = data
    weights = torch.tensor([0.2989, 0.5870, 0.1140], device=rgb.device, dtype=rgb.dtype)
    output[...] = torch.sum(rgb * weights, dim=-1)
    return output

def generate_input(size, seed):
    gen = torch.Generator(device="cuda")
    gen.manual_seed(seed)
    x = torch.rand(size, size, 3, device="cuda", dtype=torch.float32, generator=gen).contiguous()
    y = torch.empty(size, size, device="cuda", dtype=torch.float32).contiguous()
    return x, y

def check_implementation(data, submission_output, rtol=1e-4, atol=1e-4):
    ref_output = ref_kernel(data)
    if submission_output.shape != ref_output.shape:
        return False, f"Shape mismatch: expected {ref_output.shape}, got {submission_output.shape}"
    if torch.allclose(submission_output, ref_output, rtol=rtol, atol=atol):
        return True, "Match"
    diff = torch.abs(submission_output.float() - ref_output.float())
    return False, f"Output mismatch: max_diff={diff.max().item():.6f}"
'''