MekkCyber commited on
Commit
8b60ef7
Β·
1 Parent(s): 56bca05

add kernel structure

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitignore +2 -1
  2. build.toml +94 -0
  3. build/2.5.1+cu124/dropout_layer_norm.cpython-310-x86_64-linux-gnu.so +0 -3
  4. flake.nix +13 -0
  5. ln.h β†’ layer-norm/ln.h +0 -0
  6. ln_api.cpp β†’ layer-norm/ln_api.cpp +0 -0
  7. ln_bwd_1024.cu β†’ layer-norm/ln_bwd_1024.cu +0 -0
  8. ln_bwd_1280.cu β†’ layer-norm/ln_bwd_1280.cu +0 -0
  9. ln_bwd_1536.cu β†’ layer-norm/ln_bwd_1536.cu +0 -0
  10. ln_bwd_2048.cu β†’ layer-norm/ln_bwd_2048.cu +0 -0
  11. ln_bwd_256.cu β†’ layer-norm/ln_bwd_256.cu +0 -0
  12. ln_bwd_2560.cu β†’ layer-norm/ln_bwd_2560.cu +0 -0
  13. ln_bwd_3072.cu β†’ layer-norm/ln_bwd_3072.cu +0 -0
  14. ln_bwd_4096.cu β†’ layer-norm/ln_bwd_4096.cu +0 -0
  15. ln_bwd_512.cu β†’ layer-norm/ln_bwd_512.cu +0 -0
  16. ln_bwd_5120.cu β†’ layer-norm/ln_bwd_5120.cu +0 -0
  17. ln_bwd_6144.cu β†’ layer-norm/ln_bwd_6144.cu +0 -0
  18. ln_bwd_7168.cu β†’ layer-norm/ln_bwd_7168.cu +0 -0
  19. ln_bwd_768.cu β†’ layer-norm/ln_bwd_768.cu +0 -0
  20. ln_bwd_8192.cu β†’ layer-norm/ln_bwd_8192.cu +0 -0
  21. ln_bwd_kernels.cuh β†’ layer-norm/ln_bwd_kernels.cuh +0 -0
  22. ln_fwd_1024.cu β†’ layer-norm/ln_fwd_1024.cu +0 -0
  23. ln_fwd_1280.cu β†’ layer-norm/ln_fwd_1280.cu +0 -0
  24. ln_fwd_1536.cu β†’ layer-norm/ln_fwd_1536.cu +0 -0
  25. ln_fwd_2048.cu β†’ layer-norm/ln_fwd_2048.cu +0 -0
  26. ln_fwd_256.cu β†’ layer-norm/ln_fwd_256.cu +0 -0
  27. ln_fwd_2560.cu β†’ layer-norm/ln_fwd_2560.cu +0 -0
  28. ln_fwd_3072.cu β†’ layer-norm/ln_fwd_3072.cu +0 -0
  29. ln_fwd_4096.cu β†’ layer-norm/ln_fwd_4096.cu +0 -0
  30. ln_fwd_512.cu β†’ layer-norm/ln_fwd_512.cu +0 -0
  31. ln_fwd_5120.cu β†’ layer-norm/ln_fwd_5120.cu +0 -0
  32. ln_fwd_6144.cu β†’ layer-norm/ln_fwd_6144.cu +0 -0
  33. ln_fwd_7168.cu β†’ layer-norm/ln_fwd_7168.cu +0 -0
  34. ln_fwd_768.cu β†’ layer-norm/ln_fwd_768.cu +0 -0
  35. ln_fwd_8192.cu β†’ layer-norm/ln_fwd_8192.cu +0 -0
  36. ln_fwd_kernels.cuh β†’ layer-norm/ln_fwd_kernels.cuh +0 -0
  37. ln_kernel_traits.h β†’ layer-norm/ln_kernel_traits.h +0 -0
  38. ln_parallel_bwd_1024.cu β†’ layer-norm/ln_parallel_bwd_1024.cu +0 -0
  39. ln_parallel_bwd_1280.cu β†’ layer-norm/ln_parallel_bwd_1280.cu +0 -0
  40. ln_parallel_bwd_1536.cu β†’ layer-norm/ln_parallel_bwd_1536.cu +0 -0
  41. ln_parallel_bwd_2048.cu β†’ layer-norm/ln_parallel_bwd_2048.cu +0 -0
  42. ln_parallel_bwd_256.cu β†’ layer-norm/ln_parallel_bwd_256.cu +0 -0
  43. ln_parallel_bwd_2560.cu β†’ layer-norm/ln_parallel_bwd_2560.cu +0 -0
  44. ln_parallel_bwd_3072.cu β†’ layer-norm/ln_parallel_bwd_3072.cu +0 -0
  45. ln_parallel_bwd_4096.cu β†’ layer-norm/ln_parallel_bwd_4096.cu +0 -0
  46. ln_parallel_bwd_512.cu β†’ layer-norm/ln_parallel_bwd_512.cu +0 -0
  47. ln_parallel_bwd_5120.cu β†’ layer-norm/ln_parallel_bwd_5120.cu +0 -0
  48. ln_parallel_bwd_6144.cu β†’ layer-norm/ln_parallel_bwd_6144.cu +0 -0
  49. ln_parallel_bwd_7168.cu β†’ layer-norm/ln_parallel_bwd_7168.cu +0 -0
  50. ln_parallel_bwd_768.cu β†’ layer-norm/ln_parallel_bwd_768.cu +0 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
- /build/temp*
 
 
1
+ __pycache__/
2
+ *.pyc
build.toml ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [general]
2
+ name = "vllm_flash_attn3"
3
+ universal = false
4
+
5
+ [torch]
6
+ src = [
7
+ "torch-ext/torch_binding.cpp",
8
+ "torch-ext/torch_binding.h",
9
+ ]
10
+
11
+ [kernel.layer-norm]
12
+ depends = ["torch"]
13
+ backend = "cuda"
14
+ include = ["."]
15
+ src = [
16
+ "layer-norm/ln.h"
17
+ "layer-norm/ln_api.cpp",
18
+ "layer-norm/ln_bwd_1024.cu",
19
+ "layer-norm/ln_bwd_1280.cu",
20
+ "layer-norm/ln_bwd_1536.cu",
21
+ "layer-norm/ln_bwd_2048.cu",
22
+ "layer-norm/ln_bwd_256.cu",
23
+ "layer-norm/ln_bwd_2560.cu",
24
+ "layer-norm/ln_bwd_3072.cu",
25
+ "layer-norm/ln_bwd_4096.cu",
26
+ "layer-norm/ln_bwd_512.cu",
27
+ "layer-norm/ln_bwd_5120.cu",
28
+ "layer-norm/ln_bwd_6144.cu",
29
+ "layer-norm/ln_bwd_7168.cu",
30
+ "layer-norm/ln_bwd_768.cu",
31
+ "layer-norm/ln_bwd_8192.cu",
32
+ "layer-norm/ln_bwd_kernels.cuh",
33
+ "layer-norm/ln_fwd_1024.cu",
34
+ "layer-norm/ln_fwd_1280.cu",
35
+ "layer-norm/ln_fwd_1536.cu",
36
+ "layer-norm/ln_fwd_2048.cu",
37
+ "layer-norm/ln_fwd_256.cu",
38
+ "layer-norm/ln_fwd_2560.cu",
39
+ "layer-norm/ln_fwd_3072.cu",
40
+ "layer-norm/ln_fwd_4096.cu",
41
+ "layer-norm/ln_fwd_512.cu",
42
+ "layer-norm/ln_fwd_5120.cu",
43
+ "layer-norm/ln_fwd_6144.cu",
44
+ "layer-norm/ln_fwd_7168.cu",
45
+ "layer-norm/ln_fwd_768.cu",
46
+ "layer-norm/ln_fwd_8192.cu",
47
+ "layer-norm/ln_fwd_kernels.cuh",
48
+ "layer-norm/ln_kernel_traits.h",
49
+ "layer-norm/ln_parallel_bwd_1024.cu",
50
+ "layer-norm/ln_parallel_bwd_1280.cu",
51
+ "layer-norm/ln_parallel_bwd_1536.cu",
52
+ "layer-norm/ln_parallel_bwd_2048.cu",
53
+ "layer-norm/ln_parallel_bwd_256.cu",
54
+ "layer-norm/ln_parallel_bwd_2560.cu",
55
+ "layer-norm/ln_parallel_bwd_3072.cu",
56
+ "layer-norm/ln_parallel_bwd_4096.cu",
57
+ "layer-norm/ln_parallel_bwd_512.cu",
58
+ "layer-norm/ln_parallel_bwd_5120.cu",
59
+ "layer-norm/ln_parallel_bwd_6144.cu",
60
+ "layer-norm/ln_parallel_bwd_7168.cu",
61
+ "layer-norm/ln_parallel_bwd_768.cu",
62
+ "layer-norm/ln_parallel_bwd_8192.cu",
63
+ "layer-norm/ln_parallel_fwd_1024.cu",
64
+ "layer-norm/ln_parallel_fwd_1280.cu",
65
+ "layer-norm/ln_parallel_fwd_1536.cu",
66
+ "layer-norm/ln_parallel_fwd_2048.cu",
67
+ "layer-norm/ln_parallel_fwd_256.cu",
68
+ "layer-norm/ln_parallel_fwd_2560.cu",
69
+ "layer-norm/ln_parallel_fwd_3072.cu",
70
+ "layer-norm/ln_parallel_fwd_4096.cu",
71
+ "layer-norm/ln_parallel_fwd_512.cu",
72
+ "layer-norm/ln_parallel_fwd_5120.cu",
73
+ "layer-norm/ln_parallel_fwd_6144.cu",
74
+ "layer-norm/ln_parallel_fwd_7168.cu",
75
+ "layer-norm/ln_parallel_fwd_768.cu",
76
+ "layer-norm/ln_parallel_fwd_8192.cu",
77
+ "layer-norm/ln_parallel_residual_bwd_kernels.cuh",
78
+ "layer-norm/ln_parallel_residual_fwd_kernels.cuh",
79
+ "layer-norm/ln_utils.cuh",
80
+ "layer-norm/static_switch.h"
81
+ ]
82
+ cuda-flags = [
83
+ "-O3",
84
+ "-U__CUDA_NO_HALF_OPERATORS__",
85
+ "-U__CUDA_NO_HALF_CONVERSIONS__",
86
+ "-U__CUDA_NO_BFLOAT16_OPERATORS__",
87
+ "-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
88
+ "-U__CUDA_NO_BFLOAT162_OPERATORS__",
89
+ "-U__CUDA_NO_BFLOAT162_CONVERSIONS__",
90
+ "--expt-relaxed-constexpr",
91
+ "--expt-extended-lambda",
92
+ "--use_fast_math",
93
+ ]
94
+
build/2.5.1+cu124/dropout_layer_norm.cpython-310-x86_64-linux-gnu.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a2a2612ebb3261909ec7ca8b85213e19f5ea128aeee93eca5640222d299969c
3
- size 734115376
 
 
 
 
flake.nix ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ description = "Flake for Torch kernel extension";
3
+
4
+ inputs = {
5
+ kernel-builder.url = "github:huggingface/kernel-builder";
6
+ };
7
+
8
+ outputs = { self, kernel-builder, }:
9
+ kernel-builder.lib.genFlakeOutputs {
10
+ path = ./.;
11
+ rev = self.shortRev or self.dirtyShortRev or self.lastModifiedDate;
12
+ };
13
+ }
ln.h β†’ layer-norm/ln.h RENAMED
File without changes
ln_api.cpp β†’ layer-norm/ln_api.cpp RENAMED
File without changes
ln_bwd_1024.cu β†’ layer-norm/ln_bwd_1024.cu RENAMED
File without changes
ln_bwd_1280.cu β†’ layer-norm/ln_bwd_1280.cu RENAMED
File without changes
ln_bwd_1536.cu β†’ layer-norm/ln_bwd_1536.cu RENAMED
File without changes
ln_bwd_2048.cu β†’ layer-norm/ln_bwd_2048.cu RENAMED
File without changes
ln_bwd_256.cu β†’ layer-norm/ln_bwd_256.cu RENAMED
File without changes
ln_bwd_2560.cu β†’ layer-norm/ln_bwd_2560.cu RENAMED
File without changes
ln_bwd_3072.cu β†’ layer-norm/ln_bwd_3072.cu RENAMED
File without changes
ln_bwd_4096.cu β†’ layer-norm/ln_bwd_4096.cu RENAMED
File without changes
ln_bwd_512.cu β†’ layer-norm/ln_bwd_512.cu RENAMED
File without changes
ln_bwd_5120.cu β†’ layer-norm/ln_bwd_5120.cu RENAMED
File without changes
ln_bwd_6144.cu β†’ layer-norm/ln_bwd_6144.cu RENAMED
File without changes
ln_bwd_7168.cu β†’ layer-norm/ln_bwd_7168.cu RENAMED
File without changes
ln_bwd_768.cu β†’ layer-norm/ln_bwd_768.cu RENAMED
File without changes
ln_bwd_8192.cu β†’ layer-norm/ln_bwd_8192.cu RENAMED
File without changes
ln_bwd_kernels.cuh β†’ layer-norm/ln_bwd_kernels.cuh RENAMED
File without changes
ln_fwd_1024.cu β†’ layer-norm/ln_fwd_1024.cu RENAMED
File without changes
ln_fwd_1280.cu β†’ layer-norm/ln_fwd_1280.cu RENAMED
File without changes
ln_fwd_1536.cu β†’ layer-norm/ln_fwd_1536.cu RENAMED
File without changes
ln_fwd_2048.cu β†’ layer-norm/ln_fwd_2048.cu RENAMED
File without changes
ln_fwd_256.cu β†’ layer-norm/ln_fwd_256.cu RENAMED
File without changes
ln_fwd_2560.cu β†’ layer-norm/ln_fwd_2560.cu RENAMED
File without changes
ln_fwd_3072.cu β†’ layer-norm/ln_fwd_3072.cu RENAMED
File without changes
ln_fwd_4096.cu β†’ layer-norm/ln_fwd_4096.cu RENAMED
File without changes
ln_fwd_512.cu β†’ layer-norm/ln_fwd_512.cu RENAMED
File without changes
ln_fwd_5120.cu β†’ layer-norm/ln_fwd_5120.cu RENAMED
File without changes
ln_fwd_6144.cu β†’ layer-norm/ln_fwd_6144.cu RENAMED
File without changes
ln_fwd_7168.cu β†’ layer-norm/ln_fwd_7168.cu RENAMED
File without changes
ln_fwd_768.cu β†’ layer-norm/ln_fwd_768.cu RENAMED
File without changes
ln_fwd_8192.cu β†’ layer-norm/ln_fwd_8192.cu RENAMED
File without changes
ln_fwd_kernels.cuh β†’ layer-norm/ln_fwd_kernels.cuh RENAMED
File without changes
ln_kernel_traits.h β†’ layer-norm/ln_kernel_traits.h RENAMED
File without changes
ln_parallel_bwd_1024.cu β†’ layer-norm/ln_parallel_bwd_1024.cu RENAMED
File without changes
ln_parallel_bwd_1280.cu β†’ layer-norm/ln_parallel_bwd_1280.cu RENAMED
File without changes
ln_parallel_bwd_1536.cu β†’ layer-norm/ln_parallel_bwd_1536.cu RENAMED
File without changes
ln_parallel_bwd_2048.cu β†’ layer-norm/ln_parallel_bwd_2048.cu RENAMED
File without changes
ln_parallel_bwd_256.cu β†’ layer-norm/ln_parallel_bwd_256.cu RENAMED
File without changes
ln_parallel_bwd_2560.cu β†’ layer-norm/ln_parallel_bwd_2560.cu RENAMED
File without changes
ln_parallel_bwd_3072.cu β†’ layer-norm/ln_parallel_bwd_3072.cu RENAMED
File without changes
ln_parallel_bwd_4096.cu β†’ layer-norm/ln_parallel_bwd_4096.cu RENAMED
File without changes
ln_parallel_bwd_512.cu β†’ layer-norm/ln_parallel_bwd_512.cu RENAMED
File without changes
ln_parallel_bwd_5120.cu β†’ layer-norm/ln_parallel_bwd_5120.cu RENAMED
File without changes
ln_parallel_bwd_6144.cu β†’ layer-norm/ln_parallel_bwd_6144.cu RENAMED
File without changes
ln_parallel_bwd_7168.cu β†’ layer-norm/ln_parallel_bwd_7168.cu RENAMED
File without changes
ln_parallel_bwd_768.cu β†’ layer-norm/ln_parallel_bwd_768.cu RENAMED
File without changes