Use shared kernels-test-utils and set metal3.1 compatibility

Replace inline device detection with kernels_test_utils.get_available_devices()
from the shared kernel-builder test utilities package. Set metal-std-version to
metal3.1 for macOS 14+ compatibility (was defaulting to metal4.0/macOS 26).

Co-developed-by: Claude Code v2.1.58 (claude-opus-4-6)

Files changed (3) hide show

build.toml +1 -0
flake.lock +3 -3
tests/test_rotary_embedding.py +3 -9

build.toml CHANGED Viewed

@@ -10,6 +10,7 @@ src = [
 [kernel.rotary_embedding_metal]
 backend = "metal"
 src = [
   "rotary-embedding-metal/rotary_embedding.metal",
   "rotary-embedding-metal/rotary_embedding.mm",

 [kernel.rotary_embedding_metal]
 backend = "metal"
+metal-std-version = "metal3.1"
 src = [
   "rotary-embedding-metal/rotary_embedding.metal",
   "rotary-embedding-metal/rotary_embedding.mm",

flake.lock CHANGED Viewed

@@ -41,11 +41,11 @@
         "rust-overlay": "rust-overlay"
       },
       "locked": {
-        "lastModified": 1772650055,
-        "narHash": "sha256-6R8dJEPH+uHJyvr3nZPZ/xFwULzR4UCsLQGSjLRsxQE=",
         "owner": "ChipFlow",
         "repo": "kernels",
-        "rev": "f85b1d195c115acdb3f92c061a0dafcc0f9bfe79",
         "type": "github"
       },
       "original": {

         "rust-overlay": "rust-overlay"
       },
       "locked": {
+        "lastModified": 1773072978,
+        "narHash": "sha256-wTtMgTt1IMM5BFMh/lu+Y1jTw1P69aZcTr4fCNGvaw4=",
         "owner": "ChipFlow",
         "repo": "kernels",
+        "rev": "c220611160b60919af0c7c85438d82f3e3577aa2",
         "type": "github"
       },
       "original": {

tests/test_rotary_embedding.py CHANGED Viewed

@@ -7,17 +7,11 @@ for both NeoX (Llama/Mistral) and GPT-J rotation styles.
 import pytest
 import torch
-import rotary_embedding as ops
-def _is_mps_available() -> bool:
-    return hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
-if _is_mps_available():
-    DEVICES = ["mps"]
-else:
-    DEVICES = [f"cuda:{i}" for i in range(max(1, torch.cuda.device_count()))]
 DTYPES = [torch.float32, torch.float16, torch.bfloat16]
 HEAD_SIZES = [64, 128, 256]

 import pytest
 import torch
+from kernels_test_utils import get_available_devices
+import rotary_embedding as ops
+DEVICES = get_available_devices()
 DTYPES = [torch.float32, torch.float16, torch.bfloat16]
 HEAD_SIZES = [64, 128, 256]