Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Library/Logs/ScaleFT/sft/sft-2026-02-11T10_12_39-05_00.log +0 -5
__KERNEL_NAME_NORMALIZED___metal/__KERNEL_NAME_NORMALIZED__.metal +3 -9
__KERNEL_NAME_NORMALIZED___metal/__KERNEL_NAME_NORMALIZED__.mm +14 -13

Library/Logs/ScaleFT/sft/sft-2026-02-11T10_12_39-05_00.log DELETED Viewed

@@ -1,5 +0,0 @@
-2026-02-11T10:12:39.276-0500	INFO	sft command	{"version": "1.98.1", "pid": 7488, "args": ["/usr/local/bin/sft", "resolve", "-q", "hf.co"], "log_directory": "Library/Logs/ScaleFT/sft"}
-2026-02-11T10:12:39.282-0500	DEBUG	macOS User Defaults check returned error	{"error": "exit status 1"}
-2026-02-11T10:12:39.283-0500	DEBUG	Did not load a config from file, using default values	{"path": "Library/Application Support/ScaleFT/sft.conf", "error": "stat Library/Application Support/ScaleFT/sft.conf: no such file or directory"}
-2026-02-11T10:12:39.283-0500	ERROR	CLI Action failed	{"error": "", "action": "resolve"}
-2026-02-11T10:12:39.283-0500	INFO	RecordSpan	{"t": "trace", "operation": "cli.resolve", "start": "2026-02-11T10:12:39.283-0500", "duration": "126.584µs", "traceID": 9173706215567653899, "spanID": 4785670708162327726, "tags": {"error":true}}

__KERNEL_NAME_NORMALIZED___metal/__KERNEL_NAME_NORMALIZED__.metal CHANGED Viewed

@@ -1,14 +1,8 @@
 #include <metal_stdlib>
 using namespace metal;
-kernel void __KERNEL_NAME_NORMALIZED___forward_kernel_float(device const float *input [[buffer(0)]],
-                                device float *output [[buffer(1)]],
-                                uint index [[thread_position_in_grid]]) {
     output[index] = input[index] + 1.0f;
 }
-kernel void __KERNEL_NAME_NORMALIZED___forward_kernel_half(device const half *input [[buffer(0)]],
-                                device half *output [[buffer(1)]],
-                                uint index [[thread_position_in_grid]]) {
-    output[index] = input[index] + static_cast<half>(1.0);
-}

 #include <metal_stdlib>
 using namespace metal;
+kernel void __KERNEL_NAME_NORMALIZED___kernel(device const float *input [[buffer(0)]],
+                                              device float *output [[buffer(1)]],
+                                              uint index [[thread_position_in_grid]]) {
     output[index] = input[index] + 1.0f;
 }

__KERNEL_NAME_NORMALIZED___metal/__KERNEL_NAME_NORMALIZED__.mm CHANGED Viewed

@@ -16,27 +16,27 @@ static inline id<MTLBuffer> getMTLBufferStorage(const torch::Tensor &tensor) {
 void __KERNEL_NAME_NORMALIZED__(torch::Tensor &out, torch::Tensor const &input) {
   TORCH_CHECK(input.device().is_mps(), "input must be a MPS tensor");
   TORCH_CHECK(input.is_contiguous(), "input must be contiguous");
-  TORCH_CHECK(input.scalar_type() == torch::kFloat ||
-                  input.scalar_type() == torch::kHalf,
-              "only float32 and float16 supported");
   TORCH_CHECK(input.sizes() == out.sizes(), "Tensors must have same shape");
-  TORCH_CHECK(input.scalar_type() == out.scalar_type(), "Tensors must have same dtype");
-  TORCH_CHECK(input.device() == out.device(), "Tensors must be on same device");
   @autoreleasepool {
     id<MTLDevice> device = MTLCreateSystemDefaultDevice();
     int numThreads = input.numel();
     NSError *error = nil;
-    id<MTLLibrary> library = EMBEDDED_METALLIB_NAMESPACE::createLibrary(device, &error);
     TORCH_CHECK(library, "Failed to create Metal library: ",
                 error.localizedDescription.UTF8String);
-    std::string kernel_name = std::string("__KERNEL_NAME_NORMALIZED___forward_kernel_") +
-        (input.scalar_type() == torch::kFloat ? "float" : "half");
-    id<MTLFunction> func = [library newFunctionWithName:
-        [NSString stringWithUTF8String:kernel_name.c_str()]];
-    TORCH_CHECK(func, "Failed to create function: ", kernel_name.c_str());
     id<MTLComputePipelineState> pso =
         [device newComputePipelineStateWithFunction:func error:&error];
@@ -53,9 +53,10 @@ void __KERNEL_NAME_NORMALIZED__(torch::Tensor &out, torch::Tensor const &input)
                   offset:out.storage_offset() * out.element_size()
                  atIndex:1];
-      NSUInteger tgSize = MIN(pso.maxTotalThreadsPerThreadgroup, (NSUInteger)numThreads);
       [encoder dispatchThreads:MTLSizeMake(numThreads, 1, 1)
-         threadsPerThreadgroup:MTLSizeMake(tgSize, 1, 1)];
       [encoder endEncoding];
       torch::mps::commit();
     });

 void __KERNEL_NAME_NORMALIZED__(torch::Tensor &out, torch::Tensor const &input) {
   TORCH_CHECK(input.device().is_mps(), "input must be a MPS tensor");
   TORCH_CHECK(input.is_contiguous(), "input must be contiguous");
+  TORCH_CHECK(input.scalar_type() == at::ScalarType::Float,
+              "__KERNEL_NAME_NORMALIZED__ only supports float32");
   TORCH_CHECK(input.sizes() == out.sizes(), "Tensors must have same shape");
+  TORCH_CHECK(input.scalar_type() == out.scalar_type(),
+              "Tensors must have same dtype");
+  TORCH_CHECK(input.device() == out.device(),
+              "Tensors must be on same device");
   @autoreleasepool {
     id<MTLDevice> device = MTLCreateSystemDefaultDevice();
     int numThreads = input.numel();
     NSError *error = nil;
+    id<MTLLibrary> library =
+        EMBEDDED_METALLIB_NAMESPACE::createLibrary(device, &error);
     TORCH_CHECK(library, "Failed to create Metal library: ",
                 error.localizedDescription.UTF8String);
+    id<MTLFunction> func =
+        [library newFunctionWithName:@"__KERNEL_NAME_NORMALIZED___kernel"];
+    TORCH_CHECK(func, "Failed to create function");
     id<MTLComputePipelineState> pso =
         [device newComputePipelineStateWithFunction:func error:&error];
                   offset:out.storage_offset() * out.element_size()
                  atIndex:1];
+      NSUInteger tgSize =
+          MIN(pso.maxTotalThreadsPerThreadgroup, (NSUInteger)numThreads);
       [encoder dispatchThreads:MTLSizeMake(numThreads, 1, 1)
+          threadsPerThreadgroup:MTLSizeMake(tgSize, 1, 1)];
       [encoder endEncoding];
       torch::mps::commit();
     });