Add ANE C bridge sources (from maderix/ANE, MIT)
Browse files- src/bridge/Makefile +17 -0
- src/bridge/ane_bridge.h +87 -0
- src/bridge/ane_bridge.m +344 -0
- src/bridge/libane_bridge.dylib +0 -0
src/bridge/Makefile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CC = xcrun clang
|
| 2 |
+
CFLAGS = -O2 -Wall -Wno-deprecated-declarations -fobjc-arc -fPIC
|
| 3 |
+
FRAMEWORKS = -framework Foundation -framework IOSurface -ldl
|
| 4 |
+
TARGET = libane_bridge.dylib
|
| 5 |
+
|
| 6 |
+
all: $(TARGET)
|
| 7 |
+
|
| 8 |
+
$(TARGET): ane_bridge.m ane_bridge.h
|
| 9 |
+
$(CC) $(CFLAGS) -dynamiclib -o $@ ane_bridge.m $(FRAMEWORKS)
|
| 10 |
+
|
| 11 |
+
test: test_bridge.m ane_bridge.h $(TARGET)
|
| 12 |
+
$(CC) $(CFLAGS) -o test_bridge test_bridge.m -L. -lane_bridge $(FRAMEWORKS)
|
| 13 |
+
|
| 14 |
+
clean:
|
| 15 |
+
rm -f $(TARGET) test_bridge
|
| 16 |
+
|
| 17 |
+
.PHONY: all clean test
|
src/bridge/ane_bridge.h
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ane_bridge.h — C-callable bridge to ANE private APIs for Python ctypes
|
| 2 |
+
// Wraps _ANEInMemoryModel via private AppleNeuralEngine.framework
|
| 3 |
+
|
| 4 |
+
#ifndef ANE_BRIDGE_H
|
| 5 |
+
#define ANE_BRIDGE_H
|
| 6 |
+
|
| 7 |
+
#include <stddef.h>
|
| 8 |
+
#include <stdint.h>
|
| 9 |
+
#include <stdbool.h>
|
| 10 |
+
|
| 11 |
+
#ifdef __cplusplus
|
| 12 |
+
extern "C" {
|
| 13 |
+
#endif
|
| 14 |
+
|
| 15 |
+
// Opaque kernel handle
|
| 16 |
+
typedef struct ANEKernelHandle ANEKernelHandle;
|
| 17 |
+
|
| 18 |
+
// Initialize ANE runtime (load private framework, resolve classes)
|
| 19 |
+
// Returns 0 on success, -1 on failure
|
| 20 |
+
int ane_bridge_init(void);
|
| 21 |
+
|
| 22 |
+
// Compile a MIL program with weight blobs into an ANE kernel
|
| 23 |
+
// mil_text: UTF-8 MIL program text
|
| 24 |
+
// mil_len: length of MIL text
|
| 25 |
+
// weight_data: raw weight blob (can be NULL)
|
| 26 |
+
// weight_len: length of weight blob
|
| 27 |
+
// n_inputs: number of input tensors
|
| 28 |
+
// input_sizes: array of byte sizes for each input
|
| 29 |
+
// n_outputs: number of output tensors
|
| 30 |
+
// output_sizes: array of byte sizes for each output
|
| 31 |
+
// Returns kernel handle or NULL on failure
|
| 32 |
+
ANEKernelHandle *ane_bridge_compile(const char *mil_text, size_t mil_len,
|
| 33 |
+
const uint8_t *weight_data, size_t weight_len,
|
| 34 |
+
int n_inputs, const size_t *input_sizes,
|
| 35 |
+
int n_outputs, const size_t *output_sizes);
|
| 36 |
+
|
| 37 |
+
// Compile with multiple named weight files (for transformer kernels)
|
| 38 |
+
// weight_names: array of weight file paths (e.g. "@model_path/weights/wq.bin")
|
| 39 |
+
// weight_datas: array of weight data pointers
|
| 40 |
+
// weight_lens: array of weight data lengths
|
| 41 |
+
// n_weights: number of weight files
|
| 42 |
+
ANEKernelHandle *ane_bridge_compile_multi_weights(
|
| 43 |
+
const char *mil_text, size_t mil_len,
|
| 44 |
+
const char **weight_names, const uint8_t **weight_datas,
|
| 45 |
+
const size_t *weight_lens, int n_weights,
|
| 46 |
+
int n_inputs, const size_t *input_sizes,
|
| 47 |
+
int n_outputs, const size_t *output_sizes);
|
| 48 |
+
|
| 49 |
+
// Evaluate (run) a compiled kernel on ANE
|
| 50 |
+
// Returns true on success
|
| 51 |
+
bool ane_bridge_eval(ANEKernelHandle *kernel);
|
| 52 |
+
|
| 53 |
+
// Write data to kernel input tensor
|
| 54 |
+
void ane_bridge_write_input(ANEKernelHandle *kernel, int idx,
|
| 55 |
+
const void *data, size_t bytes);
|
| 56 |
+
|
| 57 |
+
// Read data from kernel output tensor
|
| 58 |
+
void ane_bridge_read_output(ANEKernelHandle *kernel, int idx,
|
| 59 |
+
void *data, size_t bytes);
|
| 60 |
+
|
| 61 |
+
// Free a compiled kernel and all associated resources
|
| 62 |
+
void ane_bridge_free(ANEKernelHandle *kernel);
|
| 63 |
+
|
| 64 |
+
// Get compile count (for exec() restart budgeting)
|
| 65 |
+
int ane_bridge_get_compile_count(void);
|
| 66 |
+
|
| 67 |
+
// Reset compile count
|
| 68 |
+
void ane_bridge_reset_compile_count(void);
|
| 69 |
+
|
| 70 |
+
// Build a weight blob in ANE format (128-byte header + fp16 data)
|
| 71 |
+
// src: float32 weights [rows x cols]
|
| 72 |
+
// Returns allocated buffer and sets out_len. Caller must free().
|
| 73 |
+
uint8_t *ane_bridge_build_weight_blob(const float *src, int rows, int cols,
|
| 74 |
+
size_t *out_len);
|
| 75 |
+
|
| 76 |
+
// Build a transposed weight blob in ANE format
|
| 77 |
+
uint8_t *ane_bridge_build_weight_blob_transposed(const float *src, int rows, int cols,
|
| 78 |
+
size_t *out_len);
|
| 79 |
+
|
| 80 |
+
// Free a blob allocated by ane_bridge_build_weight_blob*
|
| 81 |
+
void ane_bridge_free_blob(void *ptr);
|
| 82 |
+
|
| 83 |
+
#ifdef __cplusplus
|
| 84 |
+
}
|
| 85 |
+
#endif
|
| 86 |
+
|
| 87 |
+
#endif // ANE_BRIDGE_H
|
src/bridge/ane_bridge.m
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ane_bridge.m — Objective-C implementation of ANE bridge for Python ctypes
|
| 2 |
+
// Wraps _ANEInMemoryModel private APIs into C-callable functions
|
| 3 |
+
|
| 4 |
+
#import <Foundation/Foundation.h>
|
| 5 |
+
#import <objc/runtime.h>
|
| 6 |
+
#import <objc/message.h>
|
| 7 |
+
#import <dlfcn.h>
|
| 8 |
+
#import <IOSurface/IOSurface.h>
|
| 9 |
+
#include "ane_bridge.h"
|
| 10 |
+
|
| 11 |
+
// --- Private class references ---
|
| 12 |
+
static Class g_ANEDesc = nil;
|
| 13 |
+
static Class g_ANEInMem = nil;
|
| 14 |
+
static Class g_ANEReq = nil;
|
| 15 |
+
static Class g_ANEIO = nil;
|
| 16 |
+
static bool g_initialized = false;
|
| 17 |
+
static int g_compile_count = 0;
|
| 18 |
+
|
| 19 |
+
// --- Kernel handle struct ---
|
| 20 |
+
struct ANEKernelHandle {
|
| 21 |
+
id model; // _ANEInMemoryModel
|
| 22 |
+
IOSurfaceRef *ioInputs;
|
| 23 |
+
IOSurfaceRef *ioOutputs;
|
| 24 |
+
id request; // _ANERequest
|
| 25 |
+
NSString *tmpDir;
|
| 26 |
+
int nInputs, nOutputs;
|
| 27 |
+
size_t *inputBytes;
|
| 28 |
+
size_t *outputBytes;
|
| 29 |
+
};
|
| 30 |
+
|
| 31 |
+
// --- Public API ---
|
| 32 |
+
|
| 33 |
+
int ane_bridge_init(void) {
|
| 34 |
+
if (g_initialized) return 0;
|
| 35 |
+
|
| 36 |
+
void *handle = dlopen(
|
| 37 |
+
"/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
|
| 38 |
+
RTLD_NOW);
|
| 39 |
+
if (!handle) {
|
| 40 |
+
fprintf(stderr, "ane_bridge: Failed to load AppleNeuralEngine.framework\n");
|
| 41 |
+
return -1;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
g_ANEDesc = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
| 45 |
+
g_ANEInMem = NSClassFromString(@"_ANEInMemoryModel");
|
| 46 |
+
g_ANEReq = NSClassFromString(@"_ANERequest");
|
| 47 |
+
g_ANEIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
| 48 |
+
|
| 49 |
+
if (!g_ANEDesc || !g_ANEInMem || !g_ANEReq || !g_ANEIO) {
|
| 50 |
+
fprintf(stderr, "ane_bridge: Failed to resolve ANE private classes\n");
|
| 51 |
+
return -1;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
g_initialized = true;
|
| 55 |
+
g_compile_count = 0;
|
| 56 |
+
return 0;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
static IOSurfaceRef create_surface(size_t bytes) {
|
| 60 |
+
return IOSurfaceCreate((__bridge CFDictionaryRef)@{
|
| 61 |
+
(id)kIOSurfaceWidth: @(bytes),
|
| 62 |
+
(id)kIOSurfaceHeight: @1,
|
| 63 |
+
(id)kIOSurfaceBytesPerElement: @1,
|
| 64 |
+
(id)kIOSurfaceBytesPerRow: @(bytes),
|
| 65 |
+
(id)kIOSurfaceAllocSize: @(bytes),
|
| 66 |
+
(id)kIOSurfacePixelFormat: @0
|
| 67 |
+
});
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
ANEKernelHandle *ane_bridge_compile_multi_weights(
|
| 71 |
+
const char *mil_text, size_t mil_len,
|
| 72 |
+
const char **weight_names, const uint8_t **weight_datas,
|
| 73 |
+
const size_t *weight_lens, int n_weights,
|
| 74 |
+
int n_inputs, const size_t *input_sizes,
|
| 75 |
+
int n_outputs, const size_t *output_sizes)
|
| 76 |
+
{
|
| 77 |
+
@autoreleasepool {
|
| 78 |
+
if (!g_initialized) {
|
| 79 |
+
fprintf(stderr, "ane_bridge: Not initialized\n");
|
| 80 |
+
return NULL;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
NSData *milData = [NSData dataWithBytes:mil_text length:mil_len];
|
| 84 |
+
NSError *e = nil;
|
| 85 |
+
|
| 86 |
+
// Build weight dictionary
|
| 87 |
+
NSMutableDictionary *wdict = [NSMutableDictionary dictionary];
|
| 88 |
+
for (int i = 0; i < n_weights; i++) {
|
| 89 |
+
NSString *name = [NSString stringWithUTF8String:weight_names[i]];
|
| 90 |
+
NSData *data = [NSData dataWithBytes:weight_datas[i] length:weight_lens[i]];
|
| 91 |
+
wdict[name] = @{@"offset": @0, @"data": data};
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(
|
| 95 |
+
g_ANEDesc, @selector(modelWithMILText:weights:optionsPlist:),
|
| 96 |
+
milData, wdict, nil);
|
| 97 |
+
if (!desc) {
|
| 98 |
+
fprintf(stderr, "ane_bridge: modelWithMILText failed\n");
|
| 99 |
+
return NULL;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
id mdl = ((id(*)(Class,SEL,id))objc_msgSend)(
|
| 103 |
+
g_ANEInMem, @selector(inMemoryModelWithDescriptor:), desc);
|
| 104 |
+
if (!mdl) {
|
| 105 |
+
fprintf(stderr, "ane_bridge: inMemoryModelWithDescriptor failed\n");
|
| 106 |
+
return NULL;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
// Pre-populate temp dir
|
| 110 |
+
id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier));
|
| 111 |
+
NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx];
|
| 112 |
+
NSFileManager *fm = [NSFileManager defaultManager];
|
| 113 |
+
[fm createDirectoryAtPath:[td stringByAppendingPathComponent:@"weights"]
|
| 114 |
+
withIntermediateDirectories:YES attributes:nil error:nil];
|
| 115 |
+
[milData writeToFile:[td stringByAppendingPathComponent:@"model.mil"] atomically:YES];
|
| 116 |
+
|
| 117 |
+
for (int i = 0; i < n_weights; i++) {
|
| 118 |
+
NSString *name = [NSString stringWithUTF8String:weight_names[i]];
|
| 119 |
+
// Extract filename from path like "@model_path/weights/wq.bin" -> "weights/wq.bin"
|
| 120 |
+
NSString *relPath = name;
|
| 121 |
+
if ([name hasPrefix:@"@model_path/"]) {
|
| 122 |
+
relPath = [name substringFromIndex:12];
|
| 123 |
+
}
|
| 124 |
+
NSString *fullPath = [td stringByAppendingPathComponent:relPath];
|
| 125 |
+
NSString *dir = [fullPath stringByDeletingLastPathComponent];
|
| 126 |
+
[fm createDirectoryAtPath:dir withIntermediateDirectories:YES attributes:nil error:nil];
|
| 127 |
+
NSData *data = [NSData dataWithBytes:weight_datas[i] length:weight_lens[i]];
|
| 128 |
+
[data writeToFile:fullPath atomically:YES];
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
// Compile
|
| 132 |
+
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
| 133 |
+
mdl, @selector(compileWithQoS:options:error:), 21, @{}, &e)) {
|
| 134 |
+
fprintf(stderr, "ane_bridge: ANE compile failed: %s\n",
|
| 135 |
+
e ? [[e description] UTF8String] : "unknown");
|
| 136 |
+
[fm removeItemAtPath:td error:nil];
|
| 137 |
+
return NULL;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
// Load (with one retry after a brief pause for ANE slot reclamation)
|
| 141 |
+
BOOL loaded = ((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
| 142 |
+
mdl, @selector(loadWithQoS:options:error:), 21, @{}, &e);
|
| 143 |
+
if (!loaded) {
|
| 144 |
+
fprintf(stderr, "ane_bridge: ANE load failed (retrying in 100ms): %s\n",
|
| 145 |
+
e ? [[e description] UTF8String] : "unknown");
|
| 146 |
+
usleep(100000); // 100ms
|
| 147 |
+
e = nil;
|
| 148 |
+
loaded = ((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
| 149 |
+
mdl, @selector(loadWithQoS:options:error:), 21, @{}, &e);
|
| 150 |
+
}
|
| 151 |
+
if (!loaded) {
|
| 152 |
+
fprintf(stderr, "ane_bridge: ANE load failed after retry: %s\n",
|
| 153 |
+
e ? [[e description] UTF8String] : "unknown");
|
| 154 |
+
[fm removeItemAtPath:td error:nil];
|
| 155 |
+
return NULL;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
g_compile_count++;
|
| 159 |
+
|
| 160 |
+
// Create kernel handle
|
| 161 |
+
ANEKernelHandle *k = (ANEKernelHandle *)calloc(1, sizeof(ANEKernelHandle));
|
| 162 |
+
k->model = mdl;
|
| 163 |
+
k->tmpDir = td;
|
| 164 |
+
k->nInputs = n_inputs;
|
| 165 |
+
k->nOutputs = n_outputs;
|
| 166 |
+
k->inputBytes = (size_t *)malloc(n_inputs * sizeof(size_t));
|
| 167 |
+
k->outputBytes = (size_t *)malloc(n_outputs * sizeof(size_t));
|
| 168 |
+
memcpy(k->inputBytes, input_sizes, n_inputs * sizeof(size_t));
|
| 169 |
+
memcpy(k->outputBytes, output_sizes, n_outputs * sizeof(size_t));
|
| 170 |
+
|
| 171 |
+
// Create IOSurfaces
|
| 172 |
+
k->ioInputs = (IOSurfaceRef *)malloc(n_inputs * sizeof(IOSurfaceRef));
|
| 173 |
+
k->ioOutputs = (IOSurfaceRef *)malloc(n_outputs * sizeof(IOSurfaceRef));
|
| 174 |
+
for (int i = 0; i < n_inputs; i++)
|
| 175 |
+
k->ioInputs[i] = create_surface(input_sizes[i]);
|
| 176 |
+
for (int i = 0; i < n_outputs; i++)
|
| 177 |
+
k->ioOutputs[i] = create_surface(output_sizes[i]);
|
| 178 |
+
|
| 179 |
+
// Build request
|
| 180 |
+
NSMutableArray *wIns = [NSMutableArray arrayWithCapacity:n_inputs];
|
| 181 |
+
NSMutableArray *iIdx = [NSMutableArray arrayWithCapacity:n_inputs];
|
| 182 |
+
for (int i = 0; i < n_inputs; i++) {
|
| 183 |
+
[wIns addObject:((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(
|
| 184 |
+
g_ANEIO, @selector(objectWithIOSurface:), k->ioInputs[i])];
|
| 185 |
+
[iIdx addObject:@(i)];
|
| 186 |
+
}
|
| 187 |
+
NSMutableArray *wOuts = [NSMutableArray arrayWithCapacity:n_outputs];
|
| 188 |
+
NSMutableArray *oIdx = [NSMutableArray arrayWithCapacity:n_outputs];
|
| 189 |
+
for (int i = 0; i < n_outputs; i++) {
|
| 190 |
+
[wOuts addObject:((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(
|
| 191 |
+
g_ANEIO, @selector(objectWithIOSurface:), k->ioOutputs[i])];
|
| 192 |
+
[oIdx addObject:@(i)];
|
| 193 |
+
}
|
| 194 |
+
k->request = ((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(
|
| 195 |
+
g_ANEReq,
|
| 196 |
+
@selector(requestWithInputs:inputIndices:outputs:outputIndices:weightsBuffer:perfStats:procedureIndex:),
|
| 197 |
+
wIns, iIdx, wOuts, oIdx, nil, nil, @0);
|
| 198 |
+
|
| 199 |
+
return k;
|
| 200 |
+
}
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
ANEKernelHandle *ane_bridge_compile(const char *mil_text, size_t mil_len,
|
| 204 |
+
const uint8_t *weight_data, size_t weight_len,
|
| 205 |
+
int n_inputs, const size_t *input_sizes,
|
| 206 |
+
int n_outputs, const size_t *output_sizes) {
|
| 207 |
+
if (weight_data && weight_len > 0) {
|
| 208 |
+
const char *name = "@model_path/weights/weight.bin";
|
| 209 |
+
return ane_bridge_compile_multi_weights(
|
| 210 |
+
mil_text, mil_len,
|
| 211 |
+
&name, &weight_data, &weight_len, 1,
|
| 212 |
+
n_inputs, input_sizes,
|
| 213 |
+
n_outputs, output_sizes);
|
| 214 |
+
} else {
|
| 215 |
+
return ane_bridge_compile_multi_weights(
|
| 216 |
+
mil_text, mil_len,
|
| 217 |
+
NULL, NULL, NULL, 0,
|
| 218 |
+
n_inputs, input_sizes,
|
| 219 |
+
n_outputs, output_sizes);
|
| 220 |
+
}
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
bool ane_bridge_eval(ANEKernelHandle *kernel) {
|
| 224 |
+
@autoreleasepool {
|
| 225 |
+
if (!kernel || !kernel->model) {
|
| 226 |
+
fprintf(stderr, "ane_bridge: eval called with null kernel/model\n");
|
| 227 |
+
return false;
|
| 228 |
+
}
|
| 229 |
+
if (!kernel->request) {
|
| 230 |
+
fprintf(stderr, "ane_bridge: eval called with null request\n");
|
| 231 |
+
return false;
|
| 232 |
+
}
|
| 233 |
+
NSError *e = nil;
|
| 234 |
+
BOOL ok = ((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
|
| 235 |
+
kernel->model, @selector(evaluateWithQoS:options:request:error:),
|
| 236 |
+
21, @{}, kernel->request, &e);
|
| 237 |
+
if (!ok) {
|
| 238 |
+
fprintf(stderr, "ane_bridge: eval failed: %s\n",
|
| 239 |
+
e ? [[e description] UTF8String] : "unknown error (no NSError)");
|
| 240 |
+
}
|
| 241 |
+
return ok;
|
| 242 |
+
}
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
void ane_bridge_write_input(ANEKernelHandle *kernel, int idx,
|
| 246 |
+
const void *data, size_t bytes) {
|
| 247 |
+
if (!kernel || idx < 0 || idx >= kernel->nInputs) return;
|
| 248 |
+
IOSurfaceLock(kernel->ioInputs[idx], 0, NULL);
|
| 249 |
+
memcpy(IOSurfaceGetBaseAddress(kernel->ioInputs[idx]), data, bytes);
|
| 250 |
+
IOSurfaceUnlock(kernel->ioInputs[idx], 0, NULL);
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
void ane_bridge_read_output(ANEKernelHandle *kernel, int idx,
|
| 254 |
+
void *data, size_t bytes) {
|
| 255 |
+
if (!kernel || idx < 0 || idx >= kernel->nOutputs) return;
|
| 256 |
+
IOSurfaceLock(kernel->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL);
|
| 257 |
+
memcpy(data, IOSurfaceGetBaseAddress(kernel->ioOutputs[idx]), bytes);
|
| 258 |
+
IOSurfaceUnlock(kernel->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL);
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
void ane_bridge_free(ANEKernelHandle *kernel) {
|
| 262 |
+
@autoreleasepool {
|
| 263 |
+
if (!kernel) return;
|
| 264 |
+
NSError *e = nil;
|
| 265 |
+
if (kernel->model) {
|
| 266 |
+
((BOOL(*)(id,SEL,unsigned int,NSError**))objc_msgSend)(
|
| 267 |
+
kernel->model, @selector(unloadWithQoS:error:), 21, &e);
|
| 268 |
+
}
|
| 269 |
+
for (int i = 0; i < kernel->nInputs; i++)
|
| 270 |
+
if (kernel->ioInputs[i]) CFRelease(kernel->ioInputs[i]);
|
| 271 |
+
for (int i = 0; i < kernel->nOutputs; i++)
|
| 272 |
+
if (kernel->ioOutputs[i]) CFRelease(kernel->ioOutputs[i]);
|
| 273 |
+
if (kernel->tmpDir) {
|
| 274 |
+
[[NSFileManager defaultManager] removeItemAtPath:kernel->tmpDir error:nil];
|
| 275 |
+
}
|
| 276 |
+
free(kernel->ioInputs);
|
| 277 |
+
free(kernel->ioOutputs);
|
| 278 |
+
free(kernel->inputBytes);
|
| 279 |
+
free(kernel->outputBytes);
|
| 280 |
+
|
| 281 |
+
// Explicitly nil Objective-C objects to trigger ARC release before freeing struct
|
| 282 |
+
kernel->model = nil;
|
| 283 |
+
kernel->request = nil;
|
| 284 |
+
kernel->tmpDir = nil;
|
| 285 |
+
|
| 286 |
+
free(kernel);
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
int ane_bridge_get_compile_count(void) {
|
| 291 |
+
return g_compile_count;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
void ane_bridge_reset_compile_count(void) {
|
| 295 |
+
g_compile_count = 0;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
uint8_t *ane_bridge_build_weight_blob(const float *src, int rows, int cols,
|
| 299 |
+
size_t *out_len) {
|
| 300 |
+
int wsize = rows * cols * 2; // fp16
|
| 301 |
+
int total = 128 + wsize;
|
| 302 |
+
uint8_t *buf = (uint8_t *)calloc(total, 1);
|
| 303 |
+
|
| 304 |
+
// ANE blob header
|
| 305 |
+
buf[0] = 0x01; buf[4] = 0x02;
|
| 306 |
+
buf[64] = 0xEF; buf[65] = 0xBE; buf[66] = 0xAD; buf[67] = 0xDE;
|
| 307 |
+
buf[68] = 0x01;
|
| 308 |
+
*(uint32_t*)(buf + 72) = wsize;
|
| 309 |
+
*(uint32_t*)(buf + 80) = 128;
|
| 310 |
+
|
| 311 |
+
// Convert float32 -> float16
|
| 312 |
+
_Float16 *fp16 = (_Float16 *)(buf + 128);
|
| 313 |
+
for (int i = 0; i < rows * cols; i++) {
|
| 314 |
+
fp16[i] = (_Float16)src[i];
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
*out_len = total;
|
| 318 |
+
return buf;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
uint8_t *ane_bridge_build_weight_blob_transposed(const float *src, int rows, int cols,
|
| 322 |
+
size_t *out_len) {
|
| 323 |
+
int wsize = rows * cols * 2;
|
| 324 |
+
int total = 128 + wsize;
|
| 325 |
+
uint8_t *buf = (uint8_t *)calloc(total, 1);
|
| 326 |
+
|
| 327 |
+
buf[0] = 0x01; buf[4] = 0x02;
|
| 328 |
+
buf[64] = 0xEF; buf[65] = 0xBE; buf[66] = 0xAD; buf[67] = 0xDE;
|
| 329 |
+
buf[68] = 0x01;
|
| 330 |
+
*(uint32_t*)(buf + 72) = wsize;
|
| 331 |
+
*(uint32_t*)(buf + 80) = 128;
|
| 332 |
+
|
| 333 |
+
_Float16 *fp16 = (_Float16 *)(buf + 128);
|
| 334 |
+
for (int i = 0; i < rows; i++)
|
| 335 |
+
for (int j = 0; j < cols; j++)
|
| 336 |
+
fp16[j * rows + i] = (_Float16)src[i * cols + j];
|
| 337 |
+
|
| 338 |
+
*out_len = total;
|
| 339 |
+
return buf;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
void ane_bridge_free_blob(void *ptr) {
|
| 343 |
+
free(ptr);
|
| 344 |
+
}
|
src/bridge/libane_bridge.dylib
ADDED
|
Binary file (54.5 kB). View file
|
|
|