File size: 4,309 Bytes

cb2428f

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inference\n",
    "We have trained a well-trained checkpoint through the `ocr-sft.ipynb` tutorial, and here we use `PtEngine` to do the inference on it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import some libraries\n",
    "import os\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
    "\n",
    "from swift.llm import (\n",
    "    InferEngine, InferRequest, PtEngine, RequestConfig, get_template, load_dataset, load_image\n",
    ")\n",
    "from swift.utils import get_model_parameter_info, get_logger, seed_everything\n",
    "logger = get_logger()\n",
    "seed_everything(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Hyperparameters for inference\n",
    "last_model_checkpoint = 'output/checkpoint-xxx'\n",
    "\n",
    "# model\n",
    "model_id_or_path = 'Qwen/Qwen2-VL-2B-Instruct'  # model_id or model_path\n",
    "system = None\n",
    "infer_backend = 'pt'\n",
    "\n",
    "# dataset\n",
    "dataset = ['AI-ModelScope/LaTeX_OCR#20000']\n",
    "data_seed = 42\n",
    "split_dataset_ratio = 0.01\n",
    "num_proc = 4\n",
    "strict = False\n",
    "\n",
    "# generation_config\n",
    "max_new_tokens = 512\n",
    "temperature = 0\n",
    "stream = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get model and template, and load LoRA weights.\n",
    "engine = PtEngine(model_id_or_path, adapters=[last_model_checkpoint])\n",
    "template = get_template(engine.model_meta.template, engine.tokenizer, default_system=system)\n",
    "# The default mode of the template is 'pt', so there is no need to make any changes.\n",
    "# template.set_mode('pt')\n",
    "\n",
    "model_parameter_info = get_model_parameter_info(engine.model)\n",
    "logger.info(f'model_parameter_info: {model_parameter_info}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Due to the data_seed setting, the validation set here is the same as the validation set used during training.\n",
    "_, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,\n",
    "                              strict=strict, seed=data_seed)\n",
    "val_dataset = val_dataset.select(range(10))  # Take the first 10 items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Streaming inference and save images from the validation set.\n",
    "# The batch processing code can be found here: https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_mllm.py\n",
    "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n",
    "    request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature, stream=True)\n",
    "    gen_list = engine.infer([infer_request], request_config)\n",
    "    query = infer_request.messages[0]['content']\n",
    "    print(f'query: {query}\\nresponse: ', end='')\n",
    "    for resp in gen_list[0]:\n",
    "        if resp is None:\n",
    "            continue\n",
    "        print(resp.choices[0].delta.content, end='', flush=True)\n",
    "    print()\n",
    "\n",
    "from IPython.display import display\n",
    "os.makedirs('images', exist_ok=True)\n",
    "for i, data in enumerate(val_dataset):\n",
    "    image = data['images'][0]\n",
    "    image = load_image(image['bytes'] or image['path'])\n",
    "    image.save(f'images/{i}.png')\n",
    "    display(image)\n",
    "    infer_stream(engine, InferRequest(**data))\n",
    "    print('-' * 50)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "test_py310",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}