pcuenq HF Staff commited on
Commit
086f6c2
Β·
1 Parent(s): 32e5f25

Using flexible inputs only

Browse files
Files changed (2) hide show
  1. flexible_inputs_only.ipynb +724 -0
  2. xcode-bert-test.png +0 -0
flexible_inputs_only.ipynb ADDED
@@ -0,0 +1,724 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "8f5b0950",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import coremltools as ct"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "id": "009656b9",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "from transformers import AutoTokenizer, AutoModel\n",
21
+ "import numpy as np\n",
22
+ "import torch\n",
23
+ "import torch.nn as nn"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "markdown",
28
+ "id": "2b53abab",
29
+ "metadata": {},
30
+ "source": [
31
+ "Checking whether setting flexible inputs is enough for model conversion to work, see https://github.com/apple/coremltools/issues/1806"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "markdown",
36
+ "id": "c0eb4797",
37
+ "metadata": {},
38
+ "source": [
39
+ "## Model Setup"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 3,
45
+ "id": "6a3b370e",
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "model_id = \"bert-base-uncased\""
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 4,
55
+ "id": "1b4b35d8",
56
+ "metadata": {},
57
+ "outputs": [
58
+ {
59
+ "name": "stderr",
60
+ "output_type": "stream",
61
+ "text": [
62
+ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']\n",
63
+ "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
64
+ "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
65
+ ]
66
+ }
67
+ ],
68
+ "source": [
69
+ "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
70
+ "model = AutoModel.from_pretrained(model_id)\n",
71
+ "\n",
72
+ "model = model.eval()"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 5,
78
+ "id": "f3f55386",
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "compute_units = ct.ComputeUnit.CPU_ONLY"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 6,
88
+ "id": "ccbd0617",
89
+ "metadata": {},
90
+ "outputs": [],
91
+ "source": [
92
+ "shape = (1, 128)\n",
93
+ "inputs = {\n",
94
+ " \"input_ids\": np.random.randint(0, tokenizer.vocab_size, shape),\n",
95
+ " \"attention_mask\": np.ones(shape, dtype=np.int64),\n",
96
+ "}"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 7,
102
+ "id": "20ea1402",
103
+ "metadata": {},
104
+ "outputs": [
105
+ {
106
+ "data": {
107
+ "text/plain": [
108
+ "odict_keys(['last_hidden_state', 'pooler_output'])"
109
+ ]
110
+ },
111
+ "execution_count": 7,
112
+ "metadata": {},
113
+ "output_type": "execute_result"
114
+ }
115
+ ],
116
+ "source": [
117
+ "t_inputs = {k: torch.tensor(v, dtype=torch.int32) for k, v in inputs.items()}\n",
118
+ "outputs = model(**t_inputs)\n",
119
+ "outputs.keys()"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "markdown",
124
+ "id": "e512e19b",
125
+ "metadata": {},
126
+ "source": [
127
+ "## JIT"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 8,
133
+ "id": "ad66c2eb",
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "class Wrapper(nn.Module):\n",
138
+ " def __init__(self, model):\n",
139
+ " super().__init__()\n",
140
+ " self.model = model\n",
141
+ " \n",
142
+ " def forward(self, *args, **kwargs):\n",
143
+ " return self.model(return_dict=False, *args, **kwargs)"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": 9,
149
+ "id": "efb91bb7",
150
+ "metadata": {},
151
+ "outputs": [],
152
+ "source": [
153
+ "to_jit = Wrapper(model)\n",
154
+ "jit_inputs = list(t_inputs.values())"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": 10,
160
+ "id": "068cb16c",
161
+ "metadata": {},
162
+ "outputs": [],
163
+ "source": [
164
+ "jitted_model = torch.jit.trace(to_jit, jit_inputs)\n",
165
+ "jitted_model.eval();"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": 11,
171
+ "id": "2ae7472a",
172
+ "metadata": {},
173
+ "outputs": [],
174
+ "source": [
175
+ "with torch.no_grad():\n",
176
+ " output_jit = jitted_model(*jit_inputs)"
177
+ ]
178
+ },
179
+ {
180
+ "cell_type": "code",
181
+ "execution_count": 12,
182
+ "id": "f75237f7",
183
+ "metadata": {},
184
+ "outputs": [
185
+ {
186
+ "data": {
187
+ "text/plain": [
188
+ "tensor(0., grad_fn=<MaxBackward1>)"
189
+ ]
190
+ },
191
+ "execution_count": 12,
192
+ "metadata": {},
193
+ "output_type": "execute_result"
194
+ }
195
+ ],
196
+ "source": [
197
+ "(output_jit[0] - outputs[\"last_hidden_state\"]).abs().max()"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": 13,
203
+ "id": "820fd659",
204
+ "metadata": {},
205
+ "outputs": [
206
+ {
207
+ "data": {
208
+ "text/plain": [
209
+ "tensor(0., grad_fn=<MaxBackward1>)"
210
+ ]
211
+ },
212
+ "execution_count": 13,
213
+ "metadata": {},
214
+ "output_type": "execute_result"
215
+ }
216
+ ],
217
+ "source": [
218
+ "(output_jit[1] - outputs[\"pooler_output\"]).abs().max()"
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "markdown",
223
+ "id": "8be44765",
224
+ "metadata": {},
225
+ "source": [
226
+ "## Core ML Conversion"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "markdown",
231
+ "id": "e6b2d0ef",
232
+ "metadata": {},
233
+ "source": [
234
+ "Input shapes are already flexible. Let's check if outputs work fine after conversion."
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": 14,
240
+ "id": "5e221907",
241
+ "metadata": {},
242
+ "outputs": [],
243
+ "source": [
244
+ "input_shape = ct.Shape(shape=(1, ct.RangeDim(lower_bound=1, upper_bound=128, default=1)))"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": 15,
250
+ "id": "bb8e96d5",
251
+ "metadata": {},
252
+ "outputs": [],
253
+ "source": [
254
+ "def _get_coreml_inputs(sample_inputs):\n",
255
+ " return [\n",
256
+ " ct.TensorType(\n",
257
+ " name=k,\n",
258
+ "# shape=v.shape,\n",
259
+ " shape=input_shape,\n",
260
+ " dtype=v.numpy().dtype if isinstance(v, torch.Tensor) else v.dtype,\n",
261
+ " ) for k, v in sample_inputs.items()\n",
262
+ " ]"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": 16,
268
+ "id": "e9e83c6a",
269
+ "metadata": {},
270
+ "outputs": [
271
+ {
272
+ "name": "stderr",
273
+ "output_type": "stream",
274
+ "text": [
275
+ "Tuple detected at graph output. This will be flattened in the converted model.\n",
276
+ "Converting PyTorch Frontend ==> MIL Ops: 0%| | 0/630 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.\n",
277
+ "Converting PyTorch Frontend ==> MIL Ops: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 628/630 [00:00<00:00, 3146.95 ops/s]\n",
278
+ "Running MIL Common passes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 40/40 [00:00<00:00, 54.89 passes/s]\n",
279
+ "Running MIL FP16ComputePrecision pass: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:01<00:00, 1.00s/ passes]\n",
280
+ "Running MIL Clean up passes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11/11 [00:01<00:00, 5.53 passes/s]\n"
281
+ ]
282
+ }
283
+ ],
284
+ "source": [
285
+ "coreml_input_types = _get_coreml_inputs(t_inputs)\n",
286
+ "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n",
287
+ "\n",
288
+ "coreml_model = ct.convert(\n",
289
+ " jitted_model,\n",
290
+ " convert_to = \"mlprogram\",\n",
291
+ " minimum_deployment_target = ct.target.macOS13,\n",
292
+ " inputs = coreml_input_types,\n",
293
+ " outputs = coreml_output_types,\n",
294
+ ")"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "markdown",
299
+ "id": "f3263470",
300
+ "metadata": {},
301
+ "source": [
302
+ "Conversion succeeds. Let's run inference."
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "code",
307
+ "execution_count": 17,
308
+ "id": "378948b4",
309
+ "metadata": {},
310
+ "outputs": [],
311
+ "source": [
312
+ "coreml_outputs = coreml_model.predict(t_inputs)"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 18,
318
+ "id": "bb3e90c9",
319
+ "metadata": {},
320
+ "outputs": [
321
+ {
322
+ "name": "stdout",
323
+ "output_type": "stream",
324
+ "text": [
325
+ "last_hidden_state\n",
326
+ "\tshape: torch.Size([1, 128, 768])\n",
327
+ "\tmax diff: 0.006343722343444824\n",
328
+ "pooler_output\n",
329
+ "\tshape: torch.Size([1, 768])\n",
330
+ "\tmax diff: 0.0055205002427101135\n"
331
+ ]
332
+ }
333
+ ],
334
+ "source": [
335
+ "for name in [\"last_hidden_state\", \"pooler_output\"]:\n",
336
+ " coreml_tensor = torch.tensor(coreml_outputs[name])\n",
337
+ " diff = (coreml_tensor - outputs[name]).abs().max()\n",
338
+ " print(f\"{name}\\n\\tshape: {coreml_tensor.shape}\\n\\tmax diff: {diff}\")"
339
+ ]
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "execution_count": 21,
344
+ "id": "42284296",
345
+ "metadata": {},
346
+ "outputs": [],
347
+ "source": [
348
+ "shorter_inputs = {\n",
349
+ " \"input_ids\": t_inputs[\"input_ids\"][:, :25],\n",
350
+ " \"attention_mask\": t_inputs[\"attention_mask\"][:, :25],\n",
351
+ "}"
352
+ ]
353
+ },
354
+ {
355
+ "cell_type": "code",
356
+ "execution_count": 23,
357
+ "id": "cf38a414",
358
+ "metadata": {},
359
+ "outputs": [],
360
+ "source": [
361
+ "shorter_outputs = coreml_model.predict(shorter_inputs)"
362
+ ]
363
+ },
364
+ {
365
+ "cell_type": "code",
366
+ "execution_count": 24,
367
+ "id": "6557878c",
368
+ "metadata": {},
369
+ "outputs": [
370
+ {
371
+ "name": "stdout",
372
+ "output_type": "stream",
373
+ "text": [
374
+ "last_hidden_state shape: torch.Size([1, 25, 768])\n",
375
+ "pooler_output shape: torch.Size([1, 768])\n"
376
+ ]
377
+ }
378
+ ],
379
+ "source": [
380
+ "for name in [\"last_hidden_state\", \"pooler_output\"]:\n",
381
+ " coreml_tensor = torch.tensor(shorter_outputs[name])\n",
382
+ " print(f\"{name} shape: {coreml_tensor.shape}\")"
383
+ ]
384
+ },
385
+ {
386
+ "cell_type": "markdown",
387
+ "id": "3b1949cf",
388
+ "metadata": {},
389
+ "source": [
390
+ "Works fine. Let's know test conversion without flexible inputs."
391
+ ]
392
+ },
393
+ {
394
+ "cell_type": "markdown",
395
+ "id": "1c3f7b7d",
396
+ "metadata": {},
397
+ "source": [
398
+ "### Conversion with fixed shapes"
399
+ ]
400
+ },
401
+ {
402
+ "cell_type": "code",
403
+ "execution_count": 25,
404
+ "id": "e89c02c9",
405
+ "metadata": {},
406
+ "outputs": [],
407
+ "source": [
408
+ "input_shape = ct.Shape(shape=(1, 128))"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": 26,
414
+ "id": "4770599b",
415
+ "metadata": {},
416
+ "outputs": [
417
+ {
418
+ "name": "stderr",
419
+ "output_type": "stream",
420
+ "text": [
421
+ "Tuple detected at graph output. This will be flattened in the converted model.\n",
422
+ "Converting PyTorch Frontend ==> MIL Ops: 0%| | 0/630 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.\n",
423
+ "Converting PyTorch Frontend ==> MIL Ops: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 628/630 [00:00<00:00, 8268.92 ops/s]\n",
424
+ "Running MIL Common passes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 40/40 [00:00<00:00, 147.20 passes/s]\n",
425
+ "Running MIL FP16ComputePrecision pass: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 1.21 passes/s]\n",
426
+ "Running MIL Clean up passes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11/11 [00:01<00:00, 6.73 passes/s]\n"
427
+ ]
428
+ }
429
+ ],
430
+ "source": [
431
+ "coreml_input_types = _get_coreml_inputs(t_inputs)\n",
432
+ "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n",
433
+ "\n",
434
+ "coreml_model = ct.convert(\n",
435
+ " jitted_model,\n",
436
+ " convert_to = \"mlprogram\",\n",
437
+ " minimum_deployment_target = ct.target.macOS13,\n",
438
+ " inputs = coreml_input_types,\n",
439
+ " outputs = coreml_output_types,\n",
440
+ ")"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "code",
445
+ "execution_count": 27,
446
+ "id": "9f979b44",
447
+ "metadata": {},
448
+ "outputs": [],
449
+ "source": [
450
+ "coreml_outputs = coreml_model.predict(t_inputs)"
451
+ ]
452
+ },
453
+ {
454
+ "cell_type": "code",
455
+ "execution_count": 28,
456
+ "id": "ba178554",
457
+ "metadata": {},
458
+ "outputs": [
459
+ {
460
+ "name": "stdout",
461
+ "output_type": "stream",
462
+ "text": [
463
+ "last_hidden_state\n",
464
+ "\tshape: torch.Size([1, 128, 768])\n",
465
+ "\tmax diff: 0.02703571319580078\n",
466
+ "pooler_output\n",
467
+ "\tshape: torch.Size([1, 768])\n",
468
+ "\tmax diff: 0.014858879148960114\n"
469
+ ]
470
+ }
471
+ ],
472
+ "source": [
473
+ "for name in [\"last_hidden_state\", \"pooler_output\"]:\n",
474
+ " coreml_tensor = torch.tensor(coreml_outputs[name])\n",
475
+ " diff = (coreml_tensor - outputs[name]).abs().max()\n",
476
+ " print(f\"{name}\\n\\tshape: {coreml_tensor.shape}\\n\\tmax diff: {diff}\")"
477
+ ]
478
+ },
479
+ {
480
+ "cell_type": "code",
481
+ "execution_count": 30,
482
+ "id": "b3c1a2f0",
483
+ "metadata": {},
484
+ "outputs": [
485
+ {
486
+ "ename": "RuntimeError",
487
+ "evalue": "{\n NSLocalizedDescription = \"For input feature 'attention_mask', the provided shape 1 \\U00d7 25 is not compatible with the model's feature description.\";\n NSUnderlyingError = \"Error Domain=com.apple.CoreML Code=0 \\\"MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description\\\" UserInfo={NSLocalizedDescription=MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description}\";\n}",
488
+ "output_type": "error",
489
+ "traceback": [
490
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
491
+ "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
492
+ "Cell \u001b[0;32mIn[30], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m shorter_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mcoreml_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshorter_inputs\u001b[49m\u001b[43m)\u001b[49m\n",
493
+ "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/sdcoreml/lib/python3.9/site-packages/coremltools/models/model.py:517\u001b[0m, in \u001b[0;36mMLModel.predict\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[38;5;66;03m# TODO: remove the following call when this is fixed: rdar://92239209\u001b[39;00m\n\u001b[1;32m 516\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_float16_multiarray_input_to_float32(data)\n\u001b[0;32m--> 517\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__proxy__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 519\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _macos_version() \u001b[38;5;241m<\u001b[39m (\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m13\u001b[39m):\n",
494
+ "\u001b[0;31mRuntimeError\u001b[0m: {\n NSLocalizedDescription = \"For input feature 'attention_mask', the provided shape 1 \\U00d7 25 is not compatible with the model's feature description.\";\n NSUnderlyingError = \"Error Domain=com.apple.CoreML Code=0 \\\"MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description\\\" UserInfo={NSLocalizedDescription=MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description}\";\n}"
495
+ ]
496
+ }
497
+ ],
498
+ "source": [
499
+ "shorter_outputs = coreml_model.predict(shorter_inputs)"
500
+ ]
501
+ },
502
+ {
503
+ "cell_type": "markdown",
504
+ "id": "733c6e2a",
505
+ "metadata": {},
506
+ "source": [
507
+ "Ok, it fails. Let's do conversion to neural network instead and see if it behaves the same."
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "markdown",
512
+ "id": "2186fdc1",
513
+ "metadata": {},
514
+ "source": [
515
+ "### Neural Network Conversion"
516
+ ]
517
+ },
518
+ {
519
+ "cell_type": "markdown",
520
+ "id": "a40d4319",
521
+ "metadata": {},
522
+ "source": [
523
+ "Using flexible shapes. In order to convert to neural network we have to decrease the deployment target to `macOS11` (from `macOS13`)."
524
+ ]
525
+ },
526
+ {
527
+ "cell_type": "code",
528
+ "execution_count": 31,
529
+ "id": "a52ff3ac",
530
+ "metadata": {},
531
+ "outputs": [],
532
+ "source": [
533
+ "input_shape = ct.Shape(shape=(1, ct.RangeDim(lower_bound=1, upper_bound=128, default=1)))"
534
+ ]
535
+ },
536
+ {
537
+ "cell_type": "code",
538
+ "execution_count": 35,
539
+ "id": "be5e7785",
540
+ "metadata": {},
541
+ "outputs": [
542
+ {
543
+ "name": "stderr",
544
+ "output_type": "stream",
545
+ "text": [
546
+ "Tuple detected at graph output. This will be flattened in the converted model.\n",
547
+ "Converting PyTorch Frontend ==> MIL Ops: 0%| | 0/630 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.\n",
548
+ "Converting PyTorch Frontend ==> MIL Ops: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 628/630 [00:00<00:00, 6140.31 ops/s]\n",
549
+ "Running MIL Common passes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 40/40 [00:00<00:00, 61.07 passes/s]\n",
550
+ "Running MIL Clean up passes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11/11 [00:00<00:00, 44.94 passes/s]\n",
551
+ "Translating MIL ==> NeuralNetwork Ops: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1186/1186 [01:02<00:00, 18.85 ops/s]\n"
552
+ ]
553
+ }
554
+ ],
555
+ "source": [
556
+ "coreml_input_types = _get_coreml_inputs(t_inputs)\n",
557
+ "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n",
558
+ "\n",
559
+ "coreml_model = ct.convert(\n",
560
+ " jitted_model,\n",
561
+ " convert_to = \"neuralnetwork\",\n",
562
+ " minimum_deployment_target = ct.target.macOS11,\n",
563
+ " inputs = coreml_input_types,\n",
564
+ " outputs = coreml_output_types,\n",
565
+ ")"
566
+ ]
567
+ },
568
+ {
569
+ "cell_type": "code",
570
+ "execution_count": 36,
571
+ "id": "3bfb5dd6",
572
+ "metadata": {},
573
+ "outputs": [],
574
+ "source": [
575
+ "coreml_outputs = coreml_model.predict(t_inputs)"
576
+ ]
577
+ },
578
+ {
579
+ "cell_type": "code",
580
+ "execution_count": 37,
581
+ "id": "8c14beef",
582
+ "metadata": {},
583
+ "outputs": [],
584
+ "source": [
585
+ "shorter_outputs = coreml_model.predict(shorter_inputs)"
586
+ ]
587
+ },
588
+ {
589
+ "cell_type": "code",
590
+ "execution_count": 38,
591
+ "id": "c52eeacb",
592
+ "metadata": {},
593
+ "outputs": [
594
+ {
595
+ "name": "stdout",
596
+ "output_type": "stream",
597
+ "text": [
598
+ "pooler_output: (1, 768)\n",
599
+ "last_hidden_state: (1, 25, 768)\n"
600
+ ]
601
+ }
602
+ ],
603
+ "source": [
604
+ "for k, v in shorter_outputs.items(): print(f\"{k}: {v.shape}\")"
605
+ ]
606
+ },
607
+ {
608
+ "cell_type": "markdown",
609
+ "id": "d3613014",
610
+ "metadata": {},
611
+ "source": [
612
+ "Seems to work."
613
+ ]
614
+ },
615
+ {
616
+ "cell_type": "markdown",
617
+ "id": "375e6eab",
618
+ "metadata": {},
619
+ "source": [
620
+ "### Metadata"
621
+ ]
622
+ },
623
+ {
624
+ "cell_type": "markdown",
625
+ "id": "f836c96a",
626
+ "metadata": {},
627
+ "source": [
628
+ "What does the converted model look like in Netron or Xcode? Let's export to ML Program."
629
+ ]
630
+ },
631
+ {
632
+ "cell_type": "code",
633
+ "execution_count": 39,
634
+ "id": "9ea2c28a",
635
+ "metadata": {},
636
+ "outputs": [
637
+ {
638
+ "name": "stderr",
639
+ "output_type": "stream",
640
+ "text": [
641
+ "Tuple detected at graph output. This will be flattened in the converted model.\n",
642
+ "Converting PyTorch Frontend ==> MIL Ops: 0%| | 0/630 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.\n",
643
+ "Converting PyTorch Frontend ==> MIL Ops: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 628/630 [00:00<00:00, 5572.61 ops/s]\n",
644
+ "Running MIL Common passes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 40/40 [00:00<00:00, 51.12 passes/s]\n",
645
+ "Running MIL FP16ComputePrecision pass: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:01<00:00, 1.01s/ passes]\n",
646
+ "Running MIL Clean up passes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11/11 [00:01<00:00, 5.64 passes/s]\n"
647
+ ]
648
+ }
649
+ ],
650
+ "source": [
651
+ "coreml_input_types = _get_coreml_inputs(t_inputs)\n",
652
+ "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n",
653
+ "\n",
654
+ "coreml_model = ct.convert(\n",
655
+ " jitted_model,\n",
656
+ " convert_to = \"mlprogram\",\n",
657
+ " minimum_deployment_target = ct.target.macOS13,\n",
658
+ " inputs = coreml_input_types,\n",
659
+ " outputs = coreml_output_types,\n",
660
+ ")"
661
+ ]
662
+ },
663
+ {
664
+ "cell_type": "code",
665
+ "execution_count": 43,
666
+ "id": "96bcc86b",
667
+ "metadata": {},
668
+ "outputs": [],
669
+ "source": [
670
+ "coreml_model.save(\"bert\")"
671
+ ]
672
+ },
673
+ {
674
+ "cell_type": "markdown",
675
+ "id": "489b28d2",
676
+ "metadata": {},
677
+ "source": [
678
+ "![Xcode inputs](xcode-bert-test.png)"
679
+ ]
680
+ },
681
+ {
682
+ "cell_type": "code",
683
+ "execution_count": null,
684
+ "id": "67a972a4",
685
+ "metadata": {},
686
+ "outputs": [],
687
+ "source": []
688
+ }
689
+ ],
690
+ "metadata": {
691
+ "kernelspec": {
692
+ "display_name": "Python 3 (ipykernel)",
693
+ "language": "python",
694
+ "name": "python3"
695
+ },
696
+ "language_info": {
697
+ "codemirror_mode": {
698
+ "name": "ipython",
699
+ "version": 3
700
+ },
701
+ "file_extension": ".py",
702
+ "mimetype": "text/x-python",
703
+ "name": "python",
704
+ "nbconvert_exporter": "python",
705
+ "pygments_lexer": "ipython3",
706
+ "version": "3.9.15"
707
+ },
708
+ "toc": {
709
+ "base_numbering": 1,
710
+ "nav_menu": {},
711
+ "number_sections": true,
712
+ "sideBar": true,
713
+ "skip_h1_title": false,
714
+ "title_cell": "Table of Contents",
715
+ "title_sidebar": "Contents",
716
+ "toc_cell": false,
717
+ "toc_position": {},
718
+ "toc_section_display": true,
719
+ "toc_window_display": false
720
+ }
721
+ },
722
+ "nbformat": 4,
723
+ "nbformat_minor": 5
724
+ }
xcode-bert-test.png ADDED