danieldk HF Staff commited on
Commit
8a26a95
·
verified ·
1 Parent(s): 2e77779

Benchmarks uploaded using `kernels`.

Browse files
Files changed (1) hide show
  1. benchmarks/benchmark.py +47 -0
benchmarks/benchmark.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from kernels.benchmark import Benchmark
4
+
5
+
6
+ def rmsnorm_reference(x: torch.Tensor, eps: float) -> torch.Tensor:
7
+ rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + eps)
8
+ return x / rms
9
+
10
+
11
+ class TinygradRmsBenchmark(Benchmark):
12
+ seed: int = 42
13
+
14
+ def setup(self):
15
+ batch_size = 32
16
+ seq_len = 512
17
+ hidden_size = 1024
18
+ self.eps = 1e-6
19
+
20
+ self.x = torch.randn(
21
+ batch_size, seq_len, hidden_size, device=self.device, dtype=torch.float32
22
+ )
23
+ self.out = torch.empty_like(self.x)
24
+
25
+ def benchmark_base(self):
26
+ self.out = self.kernel.tinygrad_rms_norm_simple(self.x, self.eps)
27
+
28
+ def verify_base(self) -> torch.Tensor:
29
+ return rmsnorm_reference(self.x, self.eps)
30
+
31
+ def setup_large(self):
32
+ # Note: hidden_size must be 1024 (kernel constraint)
33
+ batch_size = 64
34
+ seq_len = 1024
35
+ hidden_size = 1024
36
+ self.eps = 1e-6
37
+
38
+ self.x = torch.randn(
39
+ batch_size, seq_len, hidden_size, device=self.device, dtype=torch.float32
40
+ )
41
+ self.out = torch.empty_like(self.x)
42
+
43
+ def benchmark_large(self):
44
+ self.out = self.kernel.tinygrad_rms_norm_simple(self.x, self.eps)
45
+
46
+ def verify_large(self) -> torch.Tensor:
47
+ return rmsnorm_reference(self.x, self.eps)