Ex0bit commited on
Commit
848e392
·
1 Parent(s): 7397b43

Add ANE C bridge sources (from maderix/ANE, MIT)

Browse files
src/bridge/Makefile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CC = xcrun clang
2
+ CFLAGS = -O2 -Wall -Wno-deprecated-declarations -fobjc-arc -fPIC
3
+ FRAMEWORKS = -framework Foundation -framework IOSurface -ldl
4
+ TARGET = libane_bridge.dylib
5
+
6
+ all: $(TARGET)
7
+
8
+ $(TARGET): ane_bridge.m ane_bridge.h
9
+ $(CC) $(CFLAGS) -dynamiclib -o $@ ane_bridge.m $(FRAMEWORKS)
10
+
11
+ test: test_bridge.m ane_bridge.h $(TARGET)
12
+ $(CC) $(CFLAGS) -o test_bridge test_bridge.m -L. -lane_bridge $(FRAMEWORKS)
13
+
14
+ clean:
15
+ rm -f $(TARGET) test_bridge
16
+
17
+ .PHONY: all clean test
src/bridge/ane_bridge.h ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ane_bridge.h — C-callable bridge to ANE private APIs for Python ctypes
2
+ // Wraps _ANEInMemoryModel via private AppleNeuralEngine.framework
3
+
4
+ #ifndef ANE_BRIDGE_H
5
+ #define ANE_BRIDGE_H
6
+
7
+ #include <stddef.h>
8
+ #include <stdint.h>
9
+ #include <stdbool.h>
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ // Opaque kernel handle
16
+ typedef struct ANEKernelHandle ANEKernelHandle;
17
+
18
+ // Initialize ANE runtime (load private framework, resolve classes)
19
+ // Returns 0 on success, -1 on failure
20
+ int ane_bridge_init(void);
21
+
22
+ // Compile a MIL program with weight blobs into an ANE kernel
23
+ // mil_text: UTF-8 MIL program text
24
+ // mil_len: length of MIL text
25
+ // weight_data: raw weight blob (can be NULL)
26
+ // weight_len: length of weight blob
27
+ // n_inputs: number of input tensors
28
+ // input_sizes: array of byte sizes for each input
29
+ // n_outputs: number of output tensors
30
+ // output_sizes: array of byte sizes for each output
31
+ // Returns kernel handle or NULL on failure
32
+ ANEKernelHandle *ane_bridge_compile(const char *mil_text, size_t mil_len,
33
+ const uint8_t *weight_data, size_t weight_len,
34
+ int n_inputs, const size_t *input_sizes,
35
+ int n_outputs, const size_t *output_sizes);
36
+
37
+ // Compile with multiple named weight files (for transformer kernels)
38
+ // weight_names: array of weight file paths (e.g. "@model_path/weights/wq.bin")
39
+ // weight_datas: array of weight data pointers
40
+ // weight_lens: array of weight data lengths
41
+ // n_weights: number of weight files
42
+ ANEKernelHandle *ane_bridge_compile_multi_weights(
43
+ const char *mil_text, size_t mil_len,
44
+ const char **weight_names, const uint8_t **weight_datas,
45
+ const size_t *weight_lens, int n_weights,
46
+ int n_inputs, const size_t *input_sizes,
47
+ int n_outputs, const size_t *output_sizes);
48
+
49
+ // Evaluate (run) a compiled kernel on ANE
50
+ // Returns true on success
51
+ bool ane_bridge_eval(ANEKernelHandle *kernel);
52
+
53
+ // Write data to kernel input tensor
54
+ void ane_bridge_write_input(ANEKernelHandle *kernel, int idx,
55
+ const void *data, size_t bytes);
56
+
57
+ // Read data from kernel output tensor
58
+ void ane_bridge_read_output(ANEKernelHandle *kernel, int idx,
59
+ void *data, size_t bytes);
60
+
61
+ // Free a compiled kernel and all associated resources
62
+ void ane_bridge_free(ANEKernelHandle *kernel);
63
+
64
+ // Get compile count (for exec() restart budgeting)
65
+ int ane_bridge_get_compile_count(void);
66
+
67
+ // Reset compile count
68
+ void ane_bridge_reset_compile_count(void);
69
+
70
+ // Build a weight blob in ANE format (128-byte header + fp16 data)
71
+ // src: float32 weights [rows x cols]
72
+ // Returns allocated buffer and sets out_len. Caller must free().
73
+ uint8_t *ane_bridge_build_weight_blob(const float *src, int rows, int cols,
74
+ size_t *out_len);
75
+
76
+ // Build a transposed weight blob in ANE format
77
+ uint8_t *ane_bridge_build_weight_blob_transposed(const float *src, int rows, int cols,
78
+ size_t *out_len);
79
+
80
+ // Free a blob allocated by ane_bridge_build_weight_blob*
81
+ void ane_bridge_free_blob(void *ptr);
82
+
83
+ #ifdef __cplusplus
84
+ }
85
+ #endif
86
+
87
+ #endif // ANE_BRIDGE_H
src/bridge/ane_bridge.m ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ane_bridge.m — Objective-C implementation of ANE bridge for Python ctypes
2
+ // Wraps _ANEInMemoryModel private APIs into C-callable functions
3
+
4
+ #import <Foundation/Foundation.h>
5
+ #import <objc/runtime.h>
6
+ #import <objc/message.h>
7
+ #import <dlfcn.h>
8
+ #import <IOSurface/IOSurface.h>
9
+ #include "ane_bridge.h"
10
+
11
+ // --- Private class references ---
12
+ static Class g_ANEDesc = nil;
13
+ static Class g_ANEInMem = nil;
14
+ static Class g_ANEReq = nil;
15
+ static Class g_ANEIO = nil;
16
+ static bool g_initialized = false;
17
+ static int g_compile_count = 0;
18
+
19
+ // --- Kernel handle struct ---
20
+ struct ANEKernelHandle {
21
+ id model; // _ANEInMemoryModel
22
+ IOSurfaceRef *ioInputs;
23
+ IOSurfaceRef *ioOutputs;
24
+ id request; // _ANERequest
25
+ NSString *tmpDir;
26
+ int nInputs, nOutputs;
27
+ size_t *inputBytes;
28
+ size_t *outputBytes;
29
+ };
30
+
31
+ // --- Public API ---
32
+
33
+ int ane_bridge_init(void) {
34
+ if (g_initialized) return 0;
35
+
36
+ void *handle = dlopen(
37
+ "/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
38
+ RTLD_NOW);
39
+ if (!handle) {
40
+ fprintf(stderr, "ane_bridge: Failed to load AppleNeuralEngine.framework\n");
41
+ return -1;
42
+ }
43
+
44
+ g_ANEDesc = NSClassFromString(@"_ANEInMemoryModelDescriptor");
45
+ g_ANEInMem = NSClassFromString(@"_ANEInMemoryModel");
46
+ g_ANEReq = NSClassFromString(@"_ANERequest");
47
+ g_ANEIO = NSClassFromString(@"_ANEIOSurfaceObject");
48
+
49
+ if (!g_ANEDesc || !g_ANEInMem || !g_ANEReq || !g_ANEIO) {
50
+ fprintf(stderr, "ane_bridge: Failed to resolve ANE private classes\n");
51
+ return -1;
52
+ }
53
+
54
+ g_initialized = true;
55
+ g_compile_count = 0;
56
+ return 0;
57
+ }
58
+
59
+ static IOSurfaceRef create_surface(size_t bytes) {
60
+ return IOSurfaceCreate((__bridge CFDictionaryRef)@{
61
+ (id)kIOSurfaceWidth: @(bytes),
62
+ (id)kIOSurfaceHeight: @1,
63
+ (id)kIOSurfaceBytesPerElement: @1,
64
+ (id)kIOSurfaceBytesPerRow: @(bytes),
65
+ (id)kIOSurfaceAllocSize: @(bytes),
66
+ (id)kIOSurfacePixelFormat: @0
67
+ });
68
+ }
69
+
70
+ ANEKernelHandle *ane_bridge_compile_multi_weights(
71
+ const char *mil_text, size_t mil_len,
72
+ const char **weight_names, const uint8_t **weight_datas,
73
+ const size_t *weight_lens, int n_weights,
74
+ int n_inputs, const size_t *input_sizes,
75
+ int n_outputs, const size_t *output_sizes)
76
+ {
77
+ @autoreleasepool {
78
+ if (!g_initialized) {
79
+ fprintf(stderr, "ane_bridge: Not initialized\n");
80
+ return NULL;
81
+ }
82
+
83
+ NSData *milData = [NSData dataWithBytes:mil_text length:mil_len];
84
+ NSError *e = nil;
85
+
86
+ // Build weight dictionary
87
+ NSMutableDictionary *wdict = [NSMutableDictionary dictionary];
88
+ for (int i = 0; i < n_weights; i++) {
89
+ NSString *name = [NSString stringWithUTF8String:weight_names[i]];
90
+ NSData *data = [NSData dataWithBytes:weight_datas[i] length:weight_lens[i]];
91
+ wdict[name] = @{@"offset": @0, @"data": data};
92
+ }
93
+
94
+ id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(
95
+ g_ANEDesc, @selector(modelWithMILText:weights:optionsPlist:),
96
+ milData, wdict, nil);
97
+ if (!desc) {
98
+ fprintf(stderr, "ane_bridge: modelWithMILText failed\n");
99
+ return NULL;
100
+ }
101
+
102
+ id mdl = ((id(*)(Class,SEL,id))objc_msgSend)(
103
+ g_ANEInMem, @selector(inMemoryModelWithDescriptor:), desc);
104
+ if (!mdl) {
105
+ fprintf(stderr, "ane_bridge: inMemoryModelWithDescriptor failed\n");
106
+ return NULL;
107
+ }
108
+
109
+ // Pre-populate temp dir
110
+ id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier));
111
+ NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx];
112
+ NSFileManager *fm = [NSFileManager defaultManager];
113
+ [fm createDirectoryAtPath:[td stringByAppendingPathComponent:@"weights"]
114
+ withIntermediateDirectories:YES attributes:nil error:nil];
115
+ [milData writeToFile:[td stringByAppendingPathComponent:@"model.mil"] atomically:YES];
116
+
117
+ for (int i = 0; i < n_weights; i++) {
118
+ NSString *name = [NSString stringWithUTF8String:weight_names[i]];
119
+ // Extract filename from path like "@model_path/weights/wq.bin" -> "weights/wq.bin"
120
+ NSString *relPath = name;
121
+ if ([name hasPrefix:@"@model_path/"]) {
122
+ relPath = [name substringFromIndex:12];
123
+ }
124
+ NSString *fullPath = [td stringByAppendingPathComponent:relPath];
125
+ NSString *dir = [fullPath stringByDeletingLastPathComponent];
126
+ [fm createDirectoryAtPath:dir withIntermediateDirectories:YES attributes:nil error:nil];
127
+ NSData *data = [NSData dataWithBytes:weight_datas[i] length:weight_lens[i]];
128
+ [data writeToFile:fullPath atomically:YES];
129
+ }
130
+
131
+ // Compile
132
+ if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
133
+ mdl, @selector(compileWithQoS:options:error:), 21, @{}, &e)) {
134
+ fprintf(stderr, "ane_bridge: ANE compile failed: %s\n",
135
+ e ? [[e description] UTF8String] : "unknown");
136
+ [fm removeItemAtPath:td error:nil];
137
+ return NULL;
138
+ }
139
+
140
+ // Load (with one retry after a brief pause for ANE slot reclamation)
141
+ BOOL loaded = ((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
142
+ mdl, @selector(loadWithQoS:options:error:), 21, @{}, &e);
143
+ if (!loaded) {
144
+ fprintf(stderr, "ane_bridge: ANE load failed (retrying in 100ms): %s\n",
145
+ e ? [[e description] UTF8String] : "unknown");
146
+ usleep(100000); // 100ms
147
+ e = nil;
148
+ loaded = ((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
149
+ mdl, @selector(loadWithQoS:options:error:), 21, @{}, &e);
150
+ }
151
+ if (!loaded) {
152
+ fprintf(stderr, "ane_bridge: ANE load failed after retry: %s\n",
153
+ e ? [[e description] UTF8String] : "unknown");
154
+ [fm removeItemAtPath:td error:nil];
155
+ return NULL;
156
+ }
157
+
158
+ g_compile_count++;
159
+
160
+ // Create kernel handle
161
+ ANEKernelHandle *k = (ANEKernelHandle *)calloc(1, sizeof(ANEKernelHandle));
162
+ k->model = mdl;
163
+ k->tmpDir = td;
164
+ k->nInputs = n_inputs;
165
+ k->nOutputs = n_outputs;
166
+ k->inputBytes = (size_t *)malloc(n_inputs * sizeof(size_t));
167
+ k->outputBytes = (size_t *)malloc(n_outputs * sizeof(size_t));
168
+ memcpy(k->inputBytes, input_sizes, n_inputs * sizeof(size_t));
169
+ memcpy(k->outputBytes, output_sizes, n_outputs * sizeof(size_t));
170
+
171
+ // Create IOSurfaces
172
+ k->ioInputs = (IOSurfaceRef *)malloc(n_inputs * sizeof(IOSurfaceRef));
173
+ k->ioOutputs = (IOSurfaceRef *)malloc(n_outputs * sizeof(IOSurfaceRef));
174
+ for (int i = 0; i < n_inputs; i++)
175
+ k->ioInputs[i] = create_surface(input_sizes[i]);
176
+ for (int i = 0; i < n_outputs; i++)
177
+ k->ioOutputs[i] = create_surface(output_sizes[i]);
178
+
179
+ // Build request
180
+ NSMutableArray *wIns = [NSMutableArray arrayWithCapacity:n_inputs];
181
+ NSMutableArray *iIdx = [NSMutableArray arrayWithCapacity:n_inputs];
182
+ for (int i = 0; i < n_inputs; i++) {
183
+ [wIns addObject:((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(
184
+ g_ANEIO, @selector(objectWithIOSurface:), k->ioInputs[i])];
185
+ [iIdx addObject:@(i)];
186
+ }
187
+ NSMutableArray *wOuts = [NSMutableArray arrayWithCapacity:n_outputs];
188
+ NSMutableArray *oIdx = [NSMutableArray arrayWithCapacity:n_outputs];
189
+ for (int i = 0; i < n_outputs; i++) {
190
+ [wOuts addObject:((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(
191
+ g_ANEIO, @selector(objectWithIOSurface:), k->ioOutputs[i])];
192
+ [oIdx addObject:@(i)];
193
+ }
194
+ k->request = ((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(
195
+ g_ANEReq,
196
+ @selector(requestWithInputs:inputIndices:outputs:outputIndices:weightsBuffer:perfStats:procedureIndex:),
197
+ wIns, iIdx, wOuts, oIdx, nil, nil, @0);
198
+
199
+ return k;
200
+ }
201
+ }
202
+
203
+ ANEKernelHandle *ane_bridge_compile(const char *mil_text, size_t mil_len,
204
+ const uint8_t *weight_data, size_t weight_len,
205
+ int n_inputs, const size_t *input_sizes,
206
+ int n_outputs, const size_t *output_sizes) {
207
+ if (weight_data && weight_len > 0) {
208
+ const char *name = "@model_path/weights/weight.bin";
209
+ return ane_bridge_compile_multi_weights(
210
+ mil_text, mil_len,
211
+ &name, &weight_data, &weight_len, 1,
212
+ n_inputs, input_sizes,
213
+ n_outputs, output_sizes);
214
+ } else {
215
+ return ane_bridge_compile_multi_weights(
216
+ mil_text, mil_len,
217
+ NULL, NULL, NULL, 0,
218
+ n_inputs, input_sizes,
219
+ n_outputs, output_sizes);
220
+ }
221
+ }
222
+
223
+ bool ane_bridge_eval(ANEKernelHandle *kernel) {
224
+ @autoreleasepool {
225
+ if (!kernel || !kernel->model) {
226
+ fprintf(stderr, "ane_bridge: eval called with null kernel/model\n");
227
+ return false;
228
+ }
229
+ if (!kernel->request) {
230
+ fprintf(stderr, "ane_bridge: eval called with null request\n");
231
+ return false;
232
+ }
233
+ NSError *e = nil;
234
+ BOOL ok = ((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
235
+ kernel->model, @selector(evaluateWithQoS:options:request:error:),
236
+ 21, @{}, kernel->request, &e);
237
+ if (!ok) {
238
+ fprintf(stderr, "ane_bridge: eval failed: %s\n",
239
+ e ? [[e description] UTF8String] : "unknown error (no NSError)");
240
+ }
241
+ return ok;
242
+ }
243
+ }
244
+
245
+ void ane_bridge_write_input(ANEKernelHandle *kernel, int idx,
246
+ const void *data, size_t bytes) {
247
+ if (!kernel || idx < 0 || idx >= kernel->nInputs) return;
248
+ IOSurfaceLock(kernel->ioInputs[idx], 0, NULL);
249
+ memcpy(IOSurfaceGetBaseAddress(kernel->ioInputs[idx]), data, bytes);
250
+ IOSurfaceUnlock(kernel->ioInputs[idx], 0, NULL);
251
+ }
252
+
253
+ void ane_bridge_read_output(ANEKernelHandle *kernel, int idx,
254
+ void *data, size_t bytes) {
255
+ if (!kernel || idx < 0 || idx >= kernel->nOutputs) return;
256
+ IOSurfaceLock(kernel->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL);
257
+ memcpy(data, IOSurfaceGetBaseAddress(kernel->ioOutputs[idx]), bytes);
258
+ IOSurfaceUnlock(kernel->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL);
259
+ }
260
+
261
+ void ane_bridge_free(ANEKernelHandle *kernel) {
262
+ @autoreleasepool {
263
+ if (!kernel) return;
264
+ NSError *e = nil;
265
+ if (kernel->model) {
266
+ ((BOOL(*)(id,SEL,unsigned int,NSError**))objc_msgSend)(
267
+ kernel->model, @selector(unloadWithQoS:error:), 21, &e);
268
+ }
269
+ for (int i = 0; i < kernel->nInputs; i++)
270
+ if (kernel->ioInputs[i]) CFRelease(kernel->ioInputs[i]);
271
+ for (int i = 0; i < kernel->nOutputs; i++)
272
+ if (kernel->ioOutputs[i]) CFRelease(kernel->ioOutputs[i]);
273
+ if (kernel->tmpDir) {
274
+ [[NSFileManager defaultManager] removeItemAtPath:kernel->tmpDir error:nil];
275
+ }
276
+ free(kernel->ioInputs);
277
+ free(kernel->ioOutputs);
278
+ free(kernel->inputBytes);
279
+ free(kernel->outputBytes);
280
+
281
+ // Explicitly nil Objective-C objects to trigger ARC release before freeing struct
282
+ kernel->model = nil;
283
+ kernel->request = nil;
284
+ kernel->tmpDir = nil;
285
+
286
+ free(kernel);
287
+ }
288
+ }
289
+
290
+ int ane_bridge_get_compile_count(void) {
291
+ return g_compile_count;
292
+ }
293
+
294
+ void ane_bridge_reset_compile_count(void) {
295
+ g_compile_count = 0;
296
+ }
297
+
298
+ uint8_t *ane_bridge_build_weight_blob(const float *src, int rows, int cols,
299
+ size_t *out_len) {
300
+ int wsize = rows * cols * 2; // fp16
301
+ int total = 128 + wsize;
302
+ uint8_t *buf = (uint8_t *)calloc(total, 1);
303
+
304
+ // ANE blob header
305
+ buf[0] = 0x01; buf[4] = 0x02;
306
+ buf[64] = 0xEF; buf[65] = 0xBE; buf[66] = 0xAD; buf[67] = 0xDE;
307
+ buf[68] = 0x01;
308
+ *(uint32_t*)(buf + 72) = wsize;
309
+ *(uint32_t*)(buf + 80) = 128;
310
+
311
+ // Convert float32 -> float16
312
+ _Float16 *fp16 = (_Float16 *)(buf + 128);
313
+ for (int i = 0; i < rows * cols; i++) {
314
+ fp16[i] = (_Float16)src[i];
315
+ }
316
+
317
+ *out_len = total;
318
+ return buf;
319
+ }
320
+
321
+ uint8_t *ane_bridge_build_weight_blob_transposed(const float *src, int rows, int cols,
322
+ size_t *out_len) {
323
+ int wsize = rows * cols * 2;
324
+ int total = 128 + wsize;
325
+ uint8_t *buf = (uint8_t *)calloc(total, 1);
326
+
327
+ buf[0] = 0x01; buf[4] = 0x02;
328
+ buf[64] = 0xEF; buf[65] = 0xBE; buf[66] = 0xAD; buf[67] = 0xDE;
329
+ buf[68] = 0x01;
330
+ *(uint32_t*)(buf + 72) = wsize;
331
+ *(uint32_t*)(buf + 80) = 128;
332
+
333
+ _Float16 *fp16 = (_Float16 *)(buf + 128);
334
+ for (int i = 0; i < rows; i++)
335
+ for (int j = 0; j < cols; j++)
336
+ fp16[j * rows + i] = (_Float16)src[i * cols + j];
337
+
338
+ *out_len = total;
339
+ return buf;
340
+ }
341
+
342
+ void ane_bridge_free_blob(void *ptr) {
343
+ free(ptr);
344
+ }
src/bridge/libane_bridge.dylib ADDED
Binary file (54.5 kB). View file