{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "8f5b0950", "metadata": {}, "outputs": [], "source": [ "import coremltools as ct" ] }, { "cell_type": "code", "execution_count": 2, "id": "009656b9", "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer, AutoModel\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn" ] }, { "cell_type": "markdown", "id": "dd7d796e", "metadata": {}, "source": [ "Checking whether setting flexible inputs is enough for model conversion to work, see https://github.com/apple/coremltools/issues/1806" ] }, { "cell_type": "markdown", "id": "c0eb4797", "metadata": {}, "source": [ "## Model Setup" ] }, { "cell_type": "code", "execution_count": 3, "id": "6a3b370e", "metadata": {}, "outputs": [], "source": [ "model_id = \"bert-base-uncased\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "1b4b35d8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] } ], "source": [ "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "model = AutoModel.from_pretrained(model_id)\n", "\n", "model = model.eval()" ] }, { "cell_type": "code", "execution_count": 5, "id": "f3f55386", "metadata": {}, "outputs": [], "source": [ "compute_units = ct.ComputeUnit.CPU_ONLY" ] }, { "cell_type": "code", "execution_count": 6, "id": "ccbd0617", "metadata": {}, "outputs": [], "source": [ "shape = (1, 128)\n", "inputs = {\n", " \"input_ids\": np.random.randint(0, tokenizer.vocab_size, shape),\n", " \"attention_mask\": np.ones(shape, dtype=np.int64),\n", "}" ] }, { "cell_type": "code", "execution_count": 7, "id": "20ea1402", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "odict_keys(['last_hidden_state', 'pooler_output'])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_inputs = {k: torch.tensor(v, dtype=torch.int32) for k, v in inputs.items()}\n", "outputs = model(**t_inputs)\n", "outputs.keys()" ] }, { "cell_type": "markdown", "id": "e512e19b", "metadata": {}, "source": [ "## JIT" ] }, { "cell_type": "code", "execution_count": 8, "id": "ad66c2eb", "metadata": {}, "outputs": [], "source": [ "class Wrapper(nn.Module):\n", " def __init__(self, model):\n", " super().__init__()\n", " self.model = model\n", " \n", " def forward(self, *args, **kwargs):\n", " return self.model(return_dict=False, *args, **kwargs)" ] }, { "cell_type": "code", "execution_count": 9, "id": "efb91bb7", "metadata": {}, "outputs": [], "source": [ "to_jit = Wrapper(model)\n", "jit_inputs = list(t_inputs.values())" ] }, { "cell_type": "code", "execution_count": 10, "id": "068cb16c", "metadata": {}, "outputs": [], "source": [ "jitted_model = torch.jit.trace(to_jit, jit_inputs)\n", "jitted_model.eval();" ] }, { "cell_type": "code", "execution_count": 11, "id": "2ae7472a", "metadata": {}, "outputs": [], "source": [ "with torch.no_grad():\n", " output_jit = jitted_model(*jit_inputs)" ] }, { "cell_type": "code", "execution_count": 12, "id": "f75237f7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(0., grad_fn=)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(output_jit[0] - outputs[\"last_hidden_state\"]).abs().max()" ] }, { "cell_type": "code", "execution_count": 13, "id": "820fd659", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(0., grad_fn=)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(output_jit[1] - outputs[\"pooler_output\"]).abs().max()" ] }, { "cell_type": "markdown", "id": "8be44765", "metadata": {}, "source": [ "## Core ML Conversion" ] }, { "cell_type": "markdown", "id": "ee65ec32", "metadata": {}, "source": [ "Input shapes are already flexible. Let's check if outputs work fine after conversion." ] }, { "cell_type": "code", "execution_count": 14, "id": "5e221907", "metadata": {}, "outputs": [], "source": [ "input_shape = ct.Shape(shape=(1, ct.RangeDim(lower_bound=1, upper_bound=128, default=1)))" ] }, { "cell_type": "code", "execution_count": 15, "id": "bb8e96d5", "metadata": {}, "outputs": [], "source": [ "def _get_coreml_inputs(sample_inputs):\n", " return [\n", " ct.TensorType(\n", " name=k,\n", "# shape=v.shape,\n", " shape=input_shape,\n", " dtype=v.numpy().dtype if isinstance(v, torch.Tensor) else v.dtype,\n", " ) for k, v in sample_inputs.items()\n", " ]" ] }, { "cell_type": "code", "execution_count": 16, "id": "e9e83c6a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Tuple detected at graph output. This will be flattened in the converted model.\n", "Converting PyTorch Frontend ==> MIL Ops: 0%| | 0/630 [00:00 MIL Ops: 100%|███████████████████████████████████████████████▊| 628/630 [00:00<00:00, 3146.95 ops/s]\n", "Running MIL Common passes: 100%|███████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 54.89 passes/s]\n", "Running MIL FP16ComputePrecision pass: 100%|█████████████████████████████████████████████████████| 1/1 [00:01<00:00, 1.00s/ passes]\n", "Running MIL Clean up passes: 100%|█████████████████████████████████████████████████████████████| 11/11 [00:01<00:00, 5.53 passes/s]\n" ] } ], "source": [ "coreml_input_types = _get_coreml_inputs(t_inputs)\n", "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n", "\n", "coreml_model = ct.convert(\n", " jitted_model,\n", " convert_to = \"mlprogram\",\n", " minimum_deployment_target = ct.target.macOS13,\n", " inputs = coreml_input_types,\n", " outputs = coreml_output_types,\n", ")" ] }, { "cell_type": "markdown", "id": "f3263470", "metadata": {}, "source": [ "Conversion succeeds. Let's run inference." ] }, { "cell_type": "code", "execution_count": 17, "id": "378948b4", "metadata": {}, "outputs": [], "source": [ "coreml_outputs = coreml_model.predict(t_inputs)" ] }, { "cell_type": "code", "execution_count": 18, "id": "bb3e90c9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "last_hidden_state\n", "\tshape: torch.Size([1, 128, 768])\n", "\tmax diff: 0.006343722343444824\n", "pooler_output\n", "\tshape: torch.Size([1, 768])\n", "\tmax diff: 0.0055205002427101135\n" ] } ], "source": [ "for name in [\"last_hidden_state\", \"pooler_output\"]:\n", " coreml_tensor = torch.tensor(coreml_outputs[name])\n", " diff = (coreml_tensor - outputs[name]).abs().max()\n", " print(f\"{name}\\n\\tshape: {coreml_tensor.shape}\\n\\tmax diff: {diff}\")" ] }, { "cell_type": "code", "execution_count": 21, "id": "206c41b0", "metadata": {}, "outputs": [], "source": [ "shorter_inputs = {\n", " \"input_ids\": t_inputs[\"input_ids\"][:, :25],\n", " \"attention_mask\": t_inputs[\"attention_mask\"][:, :25],\n", "}" ] }, { "cell_type": "code", "execution_count": 23, "id": "c14ffa7c", "metadata": {}, "outputs": [], "source": [ "shorter_outputs = coreml_model.predict(shorter_inputs)" ] }, { "cell_type": "code", "execution_count": 24, "id": "81fe194d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "last_hidden_state shape: torch.Size([1, 25, 768])\n", "pooler_output shape: torch.Size([1, 768])\n" ] } ], "source": [ "for name in [\"last_hidden_state\", \"pooler_output\"]:\n", " coreml_tensor = torch.tensor(shorter_outputs[name])\n", " print(f\"{name} shape: {coreml_tensor.shape}\")" ] }, { "cell_type": "markdown", "id": "8a8b2010", "metadata": {}, "source": [ "Works fine. Let's now test conversion without flexible inputs." ] }, { "cell_type": "markdown", "id": "6b768a11", "metadata": {}, "source": [ "### Conversion with fixed shapes" ] }, { "cell_type": "code", "execution_count": 25, "id": "8f1d57f9", "metadata": {}, "outputs": [], "source": [ "input_shape = ct.Shape(shape=(1, 128))" ] }, { "cell_type": "code", "execution_count": 26, "id": "6b7f06f3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Tuple detected at graph output. This will be flattened in the converted model.\n", "Converting PyTorch Frontend ==> MIL Ops: 0%| | 0/630 [00:00 MIL Ops: 100%|███████████████████████████████████████████████▊| 628/630 [00:00<00:00, 8268.92 ops/s]\n", "Running MIL Common passes: 100%|██████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 147.20 passes/s]\n", "Running MIL FP16ComputePrecision pass: 100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.21 passes/s]\n", "Running MIL Clean up passes: 100%|█████████████████████████████████████████████████████████████| 11/11 [00:01<00:00, 6.73 passes/s]\n" ] } ], "source": [ "coreml_input_types = _get_coreml_inputs(t_inputs)\n", "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n", "\n", "coreml_model = ct.convert(\n", " jitted_model,\n", " convert_to = \"mlprogram\",\n", " minimum_deployment_target = ct.target.macOS13,\n", " inputs = coreml_input_types,\n", " outputs = coreml_output_types,\n", ")" ] }, { "cell_type": "code", "execution_count": 27, "id": "4c8f40bc", "metadata": {}, "outputs": [], "source": [ "coreml_outputs = coreml_model.predict(t_inputs)" ] }, { "cell_type": "code", "execution_count": 28, "id": "192df561", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "last_hidden_state\n", "\tshape: torch.Size([1, 128, 768])\n", "\tmax diff: 0.02703571319580078\n", "pooler_output\n", "\tshape: torch.Size([1, 768])\n", "\tmax diff: 0.014858879148960114\n" ] } ], "source": [ "for name in [\"last_hidden_state\", \"pooler_output\"]:\n", " coreml_tensor = torch.tensor(coreml_outputs[name])\n", " diff = (coreml_tensor - outputs[name]).abs().max()\n", " print(f\"{name}\\n\\tshape: {coreml_tensor.shape}\\n\\tmax diff: {diff}\")" ] }, { "cell_type": "code", "execution_count": 30, "id": "51e48285", "metadata": {}, "outputs": [ { "ename": "RuntimeError", "evalue": "{\n NSLocalizedDescription = \"For input feature 'attention_mask', the provided shape 1 \\U00d7 25 is not compatible with the model's feature description.\";\n NSUnderlyingError = \"Error Domain=com.apple.CoreML Code=0 \\\"MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description\\\" UserInfo={NSLocalizedDescription=MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description}\";\n}", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[30], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m shorter_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mcoreml_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshorter_inputs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/sdcoreml/lib/python3.9/site-packages/coremltools/models/model.py:517\u001b[0m, in \u001b[0;36mMLModel.predict\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[38;5;66;03m# TODO: remove the following call when this is fixed: rdar://92239209\u001b[39;00m\n\u001b[1;32m 516\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_float16_multiarray_input_to_float32(data)\n\u001b[0;32m--> 517\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__proxy__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 519\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _macos_version() \u001b[38;5;241m<\u001b[39m (\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m13\u001b[39m):\n", "\u001b[0;31mRuntimeError\u001b[0m: {\n NSLocalizedDescription = \"For input feature 'attention_mask', the provided shape 1 \\U00d7 25 is not compatible with the model's feature description.\";\n NSUnderlyingError = \"Error Domain=com.apple.CoreML Code=0 \\\"MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description\\\" UserInfo={NSLocalizedDescription=MultiArray shape (1 x 25) does not match the shape (1 x 128) specified in the model description}\";\n}" ] } ], "source": [ "shorter_outputs = coreml_model.predict(shorter_inputs)" ] }, { "cell_type": "markdown", "id": "00602d06", "metadata": {}, "source": [ "Ok, it fails as expected. Let's convert to neural network instead and see if it behaves the same." ] }, { "cell_type": "markdown", "id": "61997b06", "metadata": {}, "source": [ "### Neural Network Conversion" ] }, { "cell_type": "markdown", "id": "4d5ed549", "metadata": {}, "source": [ "Using flexible shapes. In order to convert to neural network we have to decrease the deployment target to `macOS11` (from `macOS13`)." ] }, { "cell_type": "code", "execution_count": 31, "id": "5c7e9249", "metadata": {}, "outputs": [], "source": [ "input_shape = ct.Shape(shape=(1, ct.RangeDim(lower_bound=1, upper_bound=128, default=1)))" ] }, { "cell_type": "code", "execution_count": 35, "id": "4a611197", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Tuple detected at graph output. This will be flattened in the converted model.\n", "Converting PyTorch Frontend ==> MIL Ops: 0%| | 0/630 [00:00 MIL Ops: 100%|███████████████████████████████████████████████▊| 628/630 [00:00<00:00, 6140.31 ops/s]\n", "Running MIL Common passes: 100%|███████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 61.07 passes/s]\n", "Running MIL Clean up passes: 100%|█████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 44.94 passes/s]\n", "Translating MIL ==> NeuralNetwork Ops: 100%|██████████████████████████████████████████████████| 1186/1186 [01:02<00:00, 18.85 ops/s]\n" ] } ], "source": [ "coreml_input_types = _get_coreml_inputs(t_inputs)\n", "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n", "\n", "coreml_model = ct.convert(\n", " jitted_model,\n", " convert_to = \"neuralnetwork\",\n", " minimum_deployment_target = ct.target.macOS11,\n", " inputs = coreml_input_types,\n", " outputs = coreml_output_types,\n", ")" ] }, { "cell_type": "code", "execution_count": 36, "id": "ec7d7d6c", "metadata": {}, "outputs": [], "source": [ "coreml_outputs = coreml_model.predict(t_inputs)" ] }, { "cell_type": "code", "execution_count": 37, "id": "df16844f", "metadata": {}, "outputs": [], "source": [ "shorter_outputs = coreml_model.predict(shorter_inputs)" ] }, { "cell_type": "code", "execution_count": 38, "id": "83dd359c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pooler_output: (1, 768)\n", "last_hidden_state: (1, 25, 768)\n" ] } ], "source": [ "for k, v in shorter_outputs.items(): print(f\"{k}: {v.shape}\")" ] }, { "cell_type": "markdown", "id": "f6661539", "metadata": {}, "source": [ "Seems to work." ] }, { "cell_type": "markdown", "id": "0bbe7691", "metadata": {}, "source": [ "### Metadata" ] }, { "cell_type": "markdown", "id": "41953964", "metadata": {}, "source": [ "What does the converted model look like in Netron or Xcode? Let's export to ML Program." ] }, { "cell_type": "code", "execution_count": 39, "id": "123ff055", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Tuple detected at graph output. This will be flattened in the converted model.\n", "Converting PyTorch Frontend ==> MIL Ops: 0%| | 0/630 [00:00 MIL Ops: 100%|███████████████████████████████████████████████▊| 628/630 [00:00<00:00, 5572.61 ops/s]\n", "Running MIL Common passes: 100%|███████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 51.12 passes/s]\n", "Running MIL FP16ComputePrecision pass: 100%|█████████████████████████████████████████████████████| 1/1 [00:01<00:00, 1.01s/ passes]\n", "Running MIL Clean up passes: 100%|█████████████████████████████████████████████████████████████| 11/11 [00:01<00:00, 5.64 passes/s]\n" ] } ], "source": [ "coreml_input_types = _get_coreml_inputs(t_inputs)\n", "coreml_output_types = [ct.TensorType(name=name) for name in outputs.keys()]\n", "\n", "coreml_model = ct.convert(\n", " jitted_model,\n", " convert_to = \"mlprogram\",\n", " minimum_deployment_target = ct.target.macOS13,\n", " inputs = coreml_input_types,\n", " outputs = coreml_output_types,\n", ")" ] }, { "cell_type": "code", "execution_count": 43, "id": "4486fd5d", "metadata": {}, "outputs": [], "source": [ "coreml_model.save(\"bert\")" ] }, { "cell_type": "markdown", "id": "4246b382", "metadata": {}, "source": [ "`xcode-bert-test.png`:" ] }, { "cell_type": "markdown", "id": "0678e157", "metadata": {}, "source": [ "![Xcode inputs](xcode-bert-test.png)" ] }, { "cell_type": "code", "execution_count": null, "id": "e8329bba", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }