Upload ms-swift/examples/notebook/qwen2vl-ocr/infer.ipynb with huggingface_hub

Browse files

Files changed (1) hide show

ms-swift/examples/notebook/qwen2vl-ocr/infer.ipynb +136 -0

ms-swift/examples/notebook/qwen2vl-ocr/infer.ipynb ADDED Viewed

	@@ -0,0 +1,136 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inference\n",
+    "We have trained a well-trained checkpoint through the `ocr-sft.ipynb` tutorial, and here we use `PtEngine` to do the inference on it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import (\n",
+    "    InferEngine, InferRequest, PtEngine, RequestConfig, get_template, load_dataset, load_image\n",
+    ")\n",
+    "from swift.utils import get_model_parameter_info, get_logger, seed_everything\n",
+    "logger = get_logger()\n",
+    "seed_everything(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for inference\n",
+    "last_model_checkpoint = 'output/checkpoint-xxx'\n",
+    "\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2-VL-2B-Instruct'  # model_id or model_path\n",
+    "system = None\n",
+    "infer_backend = 'pt'\n",
+    "\n",
+    "# dataset\n",
+    "dataset = ['AI-ModelScope/LaTeX_OCR#20000']\n",
+    "data_seed = 42\n",
+    "split_dataset_ratio = 0.01\n",
+    "num_proc = 4\n",
+    "strict = False\n",
+    "\n",
+    "# generation_config\n",
+    "max_new_tokens = 512\n",
+    "temperature = 0\n",
+    "stream = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get model and template, and load LoRA weights.\n",
+    "engine = PtEngine(model_id_or_path, adapters=[last_model_checkpoint])\n",
+    "template = get_template(engine.model_meta.template, engine.tokenizer, default_system=system)\n",
+    "# The default mode of the template is 'pt', so there is no need to make any changes.\n",
+    "# template.set_mode('pt')\n",
+    "\n",
+    "model_parameter_info = get_model_parameter_info(engine.model)\n",
+    "logger.info(f'model_parameter_info: {model_parameter_info}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Due to the data_seed setting, the validation set here is the same as the validation set used during training.\n",
+    "_, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,\n",
+    "                              strict=strict, seed=data_seed)\n",
+    "val_dataset = val_dataset.select(range(10))  # Take the first 10 items"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Streaming inference and save images from the validation set.\n",
+    "# The batch processing code can be found here: https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_mllm.py\n",
+    "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature, stream=True)\n",
+    "    gen_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    print(f'query: {query}\\nresponse: ', end='')\n",
+    "    for resp in gen_list[0]:\n",
+    "        if resp is None:\n",
+    "            continue\n",
+    "        print(resp.choices[0].delta.content, end='', flush=True)\n",
+    "    print()\n",
+    "\n",
+    "from IPython.display import display\n",
+    "os.makedirs('images', exist_ok=True)\n",
+    "for i, data in enumerate(val_dataset):\n",
+    "    image = data['images'][0]\n",
+    "    image = load_image(image['bytes'] or image['path'])\n",
+    "    image.save(f'images/{i}.png')\n",
+    "    display(image)\n",
+    "    infer_stream(engine, InferRequest(**data))\n",
+    "    print('-' * 50)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test_py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}