pcuenq
/

gists

Model card Files Files and versions

xet

Community

pcuenq HF Staff commited on Mar 22, 2023

Commit

086f6c2

1 Parent(s): 32e5f25

Using flexible inputs only

Browse files

Files changed (2) hide show

flexible_inputs_only.ipynb +724 -0
xcode-bert-test.png +0 -0

flexible_inputs_only.ipynb ADDED Viewed

	@@ -0,0 +1,724 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8f5b0950",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import coremltools as ct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "009656b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer, AutoModel\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn as nn"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2b53abab",
+   "metadata": {},
+   "source": [
+    "Checking whether setting flexible inputs is enough for model conversion to work, see https://github.com/apple/coremltools/issues/1806"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c0eb4797",
+   "metadata": {},
+   "source": [
+    "## Model Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6a3b370e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_id = \"bert-base-uncased\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "1b4b35d8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']\n",
+      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+     ]
+    }
+   ],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
+    "model = AutoModel.from_pretrained(model_id)\n",
+    "\n",
+    "model = model.eval()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f3f55386",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_units = ct.ComputeUnit.CPU_ONLY"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ccbd0617",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shape = (1, 128)\n",
+    "inputs = {\n",
+    "    \"input_ids\": np.random.randint(0, tokenizer.vocab_size, shape),\n",
+    "    \"attention_mask\": np.ones(shape, dtype=np.int64),\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "20ea1402",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "odict_keys(['last_hidden_state', 'pooler_output'])"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t_inputs = {k: torch.tensor(v, dtype=torch.int32) for k, v in inputs.items()}\n",
+    "outputs = model(**t_inputs)\n",
+    "outputs.keys()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e512e19b",
+   "metadata": {},
+   "source": [
+    "## JIT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "ad66c2eb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Wrapper(nn.Module):\n",
+    "    def __init__(self, model):\n",
+    "        super().__init__()\n",
+    "        self.model = model\n",
+    "        \n",
+    "    def forward(self, *args, **kwargs):\n",
+    "        return self.model(return_dict=False, *args, **kwargs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "efb91bb7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_jit = Wrapper(model)\n",
+    "jit_inputs = list(t_inputs.values())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "068cb16c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jitted_model = torch.jit.trace(to_jit, jit_inputs)\n",
+    "jitted_model.eval();"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "2ae7472a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with torch.no_grad():\n",
+    "    output_jit = jitted_model(*jit_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "f75237f7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor(0., grad_fn=<MaxBackward1>)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(output_jit[0] - outputs[\"last_hidden_state\"]).abs().max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "820fd659",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor(0., grad_fn=<MaxBackward1>)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(output_jit[1] - outputs[\"pooler_output\"]).abs().max()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8be44765",
+   "metadata": {},
+   "source": [
+    "## Core ML Conversion"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6b2d0ef",
+   "metadata": {},
+   "source": [
+    "Input shapes are already flexible. Let's check if outputs work fine after conversion."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "5e221907",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_shape = ct.Shape(shape=(1, ct.RangeDim(lower_bound=1, upper_bound=128, default=1)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "bb8e96d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _get_coreml_inputs(sample_inputs):\n",
+    "    return [\n",
+    "        ct.TensorType(\n",
+    "            name=k,\n",
+    "#             shape=v.shape,\n",
+    "            shape=input_shape,\n",
+    "            dtype=v.numpy().dtype if isinstance(v, torch.Tensor) else v.dtype,\n",
+    "        ) for k, v in sample_inputs.items()\n",
+    "    ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "e9e83c6a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Tuple detected at graph output. This will be flattened in the converted model.\n",
+      "Converting PyTorch Frontend ==> MIL Ops:   0%|                                                            | 0/630 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.\n",
+      "Converting PyTorch Frontend ==> MIL Ops: 100%|███████████████████████████████████████████████▊| 628/630 [00:00<00:00, 3146.95 ops/s]\n",
+      "Running MIL Common passes: 100%|███████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 54.89 passes/s]\n",
+      "Running MIL FP16ComputePrecision pass: 100%|█████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.00s/ passes]\n",
+      "Running MIL Clean up passes: 100%|█████████████████████████████████████████████████████████████| 11/11 [00:01<00:00,  5.53 passes/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "coreml_input_types = _get_coreml_inputs(t_inputs)\n",
+    "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n",
+    "\n",
+    "coreml_model = ct.convert(\n",
+    "    jitted_model,\n",
+    "    convert_to = \"mlprogram\",\n",
+    "    minimum_deployment_target = ct.target.macOS13,\n",
+    "    inputs = coreml_input_types,\n",
+    "    outputs = coreml_output_types,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f3263470",
+   "metadata": {},
+   "source": [
+    "Conversion succeeds. Let's run inference."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "378948b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "coreml_outputs = coreml_model.predict(t_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "bb3e90c9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "last_hidden_state\n",
+      "\tshape: torch.Size([1, 128, 768])\n",
+      "\tmax diff: 0.006343722343444824\n",
+      "pooler_output\n",
+      "\tshape: torch.Size([1, 768])\n",
+      "\tmax diff: 0.0055205002427101135\n"
+     ]
+    }
+   ],
+   "source": [
+    "for name in [\"last_hidden_state\", \"pooler_output\"]:\n",
+    "    coreml_tensor = torch.tensor(coreml_outputs[name])\n",
+    "    diff = (coreml_tensor - outputs[name]).abs().max()\n",
+    "    print(f\"{name}\\n\\tshape: {coreml_tensor.shape}\\n\\tmax diff: {diff}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "42284296",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shorter_inputs = {\n",
+    "    \"input_ids\": t_inputs[\"input_ids\"][:, :25],\n",
+    "    \"attention_mask\": t_inputs[\"attention_mask\"][:, :25],\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "cf38a414",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shorter_outputs = coreml_model.predict(shorter_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "6557878c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "last_hidden_state shape: torch.Size([1, 25, 768])\n",
+      "pooler_output shape: torch.Size([1, 768])\n"
+     ]
+    }
+   ],
+   "source": [
+    "for name in [\"last_hidden_state\", \"pooler_output\"]:\n",
+    "    coreml_tensor = torch.tensor(shorter_outputs[name])\n",
+    "    print(f\"{name} shape: {coreml_tensor.shape}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3b1949cf",
+   "metadata": {},
+   "source": [
+    "Works fine. Let's know test conversion without flexible inputs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c3f7b7d",
+   "metadata": {},
+   "source": [
+    "### Conversion with fixed shapes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "e89c02c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_shape = ct.Shape(shape=(1, 128))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "4770599b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Tuple detected at graph output. This will be flattened in the converted model.\n",
+      "Converting PyTorch Frontend ==> MIL Ops:   0%|                                                            | 0/630 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.\n",
+      "Converting PyTorch Frontend ==> MIL Ops: 100%|███████████████████████████████████████████████▊| 628/630 [00:00<00:00, 8268.92 ops/s]\n",
+      "Running MIL Common passes: 100%|██████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 147.20 passes/s]\n",
+      "Running MIL FP16ComputePrecision pass: 100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.21 passes/s]\n",
+      "Running MIL Clean up passes: 100%|█████████████████████████████████████████████████████████████| 11/11 [00:01<00:00,  6.73 passes/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "coreml_input_types = _get_coreml_inputs(t_inputs)\n",
+    "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n",
+    "\n",
+    "coreml_model = ct.convert(\n",
+    "    jitted_model,\n",
+    "    convert_to = \"mlprogram\",\n",
+    "    minimum_deployment_target = ct.target.macOS13,\n",
+    "    inputs = coreml_input_types,\n",
+    "    outputs = coreml_output_types,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "9f979b44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "coreml_outputs = coreml_model.predict(t_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "ba178554",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "last_hidden_state\n",
+      "\tshape: torch.Size([1, 128, 768])\n",
+      "\tmax diff: 0.02703571319580078\n",
+      "pooler_output\n",
+      "\tshape: torch.Size([1, 768])\n",
+      "\tmax diff: 0.014858879148960114\n"
+     ]
+    }
+   ],
+   "source": [
+    "for name in [\"last_hidden_state\", \"pooler_output\"]:\n",
+    "    coreml_tensor = torch.tensor(coreml_outputs[name])\n",
+    "    diff = (coreml_tensor - outputs[name]).abs().max()\n",
+    "    print(f\"{name}\\n\\tshape: {coreml_tensor.shape}\\n\\tmax diff: {diff}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "b3c1a2f0",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "RuntimeError",
+     "evalue": "{\n    NSLocalizedDescription = \"For input feature 'attention_mask', the provided shape 1 \\U00d7 25 is not compatible with the model's feature description.\";\n    NSUnderlyingError = \"Error Domain=com.apple.CoreML Code=0 \\\"MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description\\\" UserInfo={NSLocalizedDescription=MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description}\";\n}",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[30], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m shorter_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mcoreml_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshorter_inputs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/sdcoreml/lib/python3.9/site-packages/coremltools/models/model.py:517\u001b[0m, in \u001b[0;36mMLModel.predict\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m    515\u001b[0m     \u001b[38;5;66;03m# TODO: remove the following call when this is fixed: rdar://92239209\u001b[39;00m\n\u001b[1;32m    516\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_float16_multiarray_input_to_float32(data)\n\u001b[0;32m--> 517\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__proxy__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    518\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    519\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m _macos_version() \u001b[38;5;241m<\u001b[39m (\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m13\u001b[39m):\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: {\n    NSLocalizedDescription = \"For input feature 'attention_mask', the provided shape 1 \\U00d7 25 is not compatible with the model's feature description.\";\n    NSUnderlyingError = \"Error Domain=com.apple.CoreML Code=0 \\\"MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description\\\" UserInfo={NSLocalizedDescription=MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description}\";\n}"
+     ]
+    }
+   ],
+   "source": [
+    "shorter_outputs = coreml_model.predict(shorter_inputs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "733c6e2a",
+   "metadata": {},
+   "source": [
+    "Ok, it fails. Let's do conversion to neural network instead and see if it behaves the same."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2186fdc1",
+   "metadata": {},
+   "source": [
+    "### Neural Network Conversion"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a40d4319",
+   "metadata": {},
+   "source": [
+    "Using flexible shapes. In order to convert to neural network we have to decrease the deployment target to `macOS11` (from `macOS13`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "a52ff3ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_shape = ct.Shape(shape=(1, ct.RangeDim(lower_bound=1, upper_bound=128, default=1)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "be5e7785",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Tuple detected at graph output. This will be flattened in the converted model.\n",
+      "Converting PyTorch Frontend ==> MIL Ops:   0%|                                                            | 0/630 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.\n",
+      "Converting PyTorch Frontend ==> MIL Ops: 100%|███████████████████████████████████████████████▊| 628/630 [00:00<00:00, 6140.31 ops/s]\n",
+      "Running MIL Common passes: 100%|███████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 61.07 passes/s]\n",
+      "Running MIL Clean up passes: 100%|█████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 44.94 passes/s]\n",
+      "Translating MIL ==> NeuralNetwork Ops: 100%|██████████████████████████████████████████████████| 1186/1186 [01:02<00:00, 18.85 ops/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "coreml_input_types = _get_coreml_inputs(t_inputs)\n",
+    "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n",
+    "\n",
+    "coreml_model = ct.convert(\n",
+    "    jitted_model,\n",
+    "    convert_to = \"neuralnetwork\",\n",
+    "    minimum_deployment_target = ct.target.macOS11,\n",
+    "    inputs = coreml_input_types,\n",
+    "    outputs = coreml_output_types,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "3bfb5dd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "coreml_outputs = coreml_model.predict(t_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "8c14beef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shorter_outputs = coreml_model.predict(shorter_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "c52eeacb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "pooler_output: (1, 768)\n",
+      "last_hidden_state: (1, 25, 768)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for k, v in shorter_outputs.items(): print(f\"{k}: {v.shape}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d3613014",
+   "metadata": {},
+   "source": [
+    "Seems to work."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "375e6eab",
+   "metadata": {},
+   "source": [
+    "### Metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f836c96a",
+   "metadata": {},
+   "source": [
+    "What does the converted model look like in Netron or Xcode? Let's export to ML Program."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "9ea2c28a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Tuple detected at graph output. This will be flattened in the converted model.\n",
+      "Converting PyTorch Frontend ==> MIL Ops:   0%|                                                            | 0/630 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.\n",
+      "Converting PyTorch Frontend ==> MIL Ops: 100%|███████████████████████████████████████████████▊| 628/630 [00:00<00:00, 5572.61 ops/s]\n",
+      "Running MIL Common passes: 100%|███████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 51.12 passes/s]\n",
+      "Running MIL FP16ComputePrecision pass: 100%|█████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.01s/ passes]\n",
+      "Running MIL Clean up passes: 100%|█████████████████████████████████████████████████████████████| 11/11 [00:01<00:00,  5.64 passes/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "coreml_input_types = _get_coreml_inputs(t_inputs)\n",
+    "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n",
+    "\n",
+    "coreml_model = ct.convert(\n",
+    "    jitted_model,\n",
+    "    convert_to = \"mlprogram\",\n",
+    "    minimum_deployment_target = ct.target.macOS13,\n",
+    "    inputs = coreml_input_types,\n",
+    "    outputs = coreml_output_types,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "96bcc86b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "coreml_model.save(\"bert\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "489b28d2",
+   "metadata": {},
+   "source": [
+    "![Xcode inputs](xcode-bert-test.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67a972a4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.15"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

xcode-bert-test.png ADDED Viewed