{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Inference\n", "We have trained a well-trained checkpoint through the `self-cognition-sft.ipynb` tutorial, and here we use `PtEngine` to do the inference on it." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# import some libraries\n", "import os\n", "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n", "\n", "from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig, get_template" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Hyperparameters for inference\n", "last_model_checkpoint = 'output/checkpoint-xxx'\n", "\n", "# model\n", "model_id_or_path = 'Qwen/Qwen2.5-3B-Instruct' # model_id or model_path\n", "system = 'You are a helpful assistant.'\n", "infer_backend = 'pt'\n", "\n", "# generation_config\n", "max_new_tokens = 512\n", "temperature = 0\n", "stream = True" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get model and template, and load LoRA weights.\n", "engine = PtEngine(model_id_or_path, adapters=[last_model_checkpoint])\n", "template = get_template(engine.model_meta.template, engine.tokenizer, default_system=system)\n", "# You can modify the `default_template` directly here, or pass it in during `engine.infer`.\n", "engine.default_template = template" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "query: who are you?\n", "response: I am an artificial intelligence language model named Xiao Huang, developed by ModelScope. I can answer various questions and engage in conversation with humans. If you have any questions or need help, feel free to ask me at any time.\n", "--------------------------------------------------\n", "query: What should I do if I can't sleep at night?\n", "response: If you're having trouble sleeping, there are several things you can try:\n", "\n", "1. Establish a regular sleep schedule: Try to go to bed and wake up at the same time every day, even on weekends.\n", "\n", "2. Create a relaxing bedtime routine: Engage in calming activities before bed, such as reading a book or taking a warm bath.\n", "\n", "3. Make your bedroom conducive to sleep: Keep your bedroom cool, dark, and quiet. Invest in comfortable bedding and pillows.\n", "\n", "4. Avoid stimulating activities before bed: Avoid using electronic devices, watching TV, or engaging in mentally stimulating activities before bed.\n", "\n", "5. Exercise regularly: Regular physical activity can help improve your sleep quality, but avoid exercising too close to bedtime.\n", "\n", "6. Manage stress: Practice relaxation techniques, such as deep breathing, meditation, or yoga, to help manage stress and promote better sleep.\n", "\n", "7. Limit caffeine and alcohol intake: Both caffeine and alcohol can disrupt sleep patterns, so it's best to limit their consumption, especially in the evening.\n", "\n", "8. Seek professional help: If you continue to have difficulty sleeping despite trying these strategies, consider seeking help from a healthcare provider or a sleep specialist.\n", "--------------------------------------------------\n", "query: 你是谁训练的?\n", "response: 我是由魔搭团队训练和开发的。\n", "--------------------------------------------------\n" ] } ], "source": [ "query_list = [\n", " 'who are you?',\n", " \"What should I do if I can't sleep at night?\",\n", " '你是谁训练的?',\n", "]\n", "\n", "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n", " request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature, stream=True)\n", " gen_list = engine.infer([infer_request], request_config)\n", " query = infer_request.messages[0]['content']\n", " print(f'query: {query}\\nresponse: ', end='')\n", " for resp in gen_list[0]:\n", " if resp is None:\n", " continue\n", " print(resp.choices[0].delta.content, end='', flush=True)\n", " print()\n", "\n", "def infer(engine: InferEngine, infer_request: InferRequest):\n", " request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature)\n", " resp_list = engine.infer([infer_request], request_config)\n", " query = infer_request.messages[0]['content']\n", " response = resp_list[0].choices[0].message.content\n", " print(f'query: {query}')\n", " print(f'response: {response}')\n", "\n", "infer_func = infer_stream if stream else infer\n", "for query in query_list:\n", " infer_func(engine, InferRequest(messages=[{'role': 'user', 'content': query}]))\n", " print('-' * 50)" ] } ], "metadata": { "kernelspec": { "display_name": "test_py310", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 2 }