novateur commited on May 13, 2025

Commit

fa00b60

verified ·

1 Parent(s): a381636

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
docs/resources/grpo_countdown.png +3 -0
docs/resources/grpo_countdown_1.png +3 -0
docs/resources/grpo_geoqa.png +3 -0
examples/infer/demo_agent.py +118 -0
examples/infer/demo_bert.py +53 -0
examples/infer/demo_grounding.py +43 -0
examples/infer/demo_hf.py +61 -0
examples/infer/demo_lora.py +68 -0
examples/infer/demo_mllm.py +145 -0
examples/infer/demo_reward_model.py +31 -0
examples/infer/lmdeploy/ddp.sh +7 -0
examples/infer/lmdeploy/mllm_tp.sh +8 -0
examples/infer/pt/batch_ddp.sh +9 -0
examples/infer/pt/bert.sh +8 -0
examples/infer/pt/lora.sh +10 -0
examples/infer/pt/mllm_device_map.sh +9 -0
examples/infer/pt/prm.sh +4 -0
examples/infer/pt/reward_model.sh +5 -0
examples/infer/vllm/ddp.sh +9 -0
examples/infer/vllm/mllm_ddp.sh +11 -0
examples/infer/vllm/mllm_tp.sh +11 -0
examples/notebook/qwen2_5-self-cognition/infer.ipynb +148 -0
examples/notebook/qwen2_5-self-cognition/infer.sh +7 -0
examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb +219 -0
examples/notebook/qwen2_5-self-cognition/sft.sh +30 -0
examples/notebook/qwen2_5-vl-grounding/zh.ipynb +261 -0
examples/notebook/qwen2vl-ocr/infer.ipynb +136 -0
examples/notebook/qwen2vl-ocr/ocr-sft.ipynb +226 -0
examples/sampler/distill/distill.sh +11 -0
examples/sampler/mcts/mcts.py +116 -0
examples/sampler/mcts/mcts.sh +35 -0
examples/sampler/mcts/system_prompt.txt +7 -0
examples/train/agent/deepseek_r1.sh +27 -0
examples/train/agent/glm4.sh +28 -0
examples/train/agent/loss_scale/infer_lora.py +90 -0
examples/train/agent/loss_scale/train.sh +28 -0
examples/train/agent/qwen2_5.sh +26 -0
examples/train/all_to_all/infer.sh +9 -0
examples/train/all_to_all/train.sh +23 -0
examples/train/base_to_chat/full.sh +28 -0
examples/train/base_to_chat/lora.sh +34 -0
examples/train/base_to_chat/lora2.sh +33 -0
examples/train/embedding/train_gme.sh +29 -0
examples/train/embedding/train_gte.sh +31 -0
examples/train/full/infer.sh +7 -0
examples/train/full/qwen2_5_32b.sh +28 -0
examples/train/full/train.sh +25 -0
examples/train/grpo/external/README.md +46 -0
examples/train/grpo/external/grpo.sh +33 -0

.gitattributes CHANGED Viewed

@@ -38,3 +38,6 @@ asset/banner.png filter=lfs diff=lfs merge=lfs -text
 docs/resources/grpo_clevr_count.png filter=lfs diff=lfs merge=lfs -text
 docs/resources/grpo_code.png filter=lfs diff=lfs merge=lfs -text
 docs/resources/dpo_data.png filter=lfs diff=lfs merge=lfs -text

 docs/resources/grpo_clevr_count.png filter=lfs diff=lfs merge=lfs -text
 docs/resources/grpo_code.png filter=lfs diff=lfs merge=lfs -text
 docs/resources/dpo_data.png filter=lfs diff=lfs merge=lfs -text
+docs/resources/grpo_countdown_1.png filter=lfs diff=lfs merge=lfs -text
+docs/resources/grpo_countdown.png filter=lfs diff=lfs merge=lfs -text
+docs/resources/grpo_geoqa.png filter=lfs diff=lfs merge=lfs -text

docs/resources/grpo_countdown.png ADDED Viewed

Git LFS Details

SHA256: 1b55fe6864e0c92549940d6989d92b3ab22be38a035cff3694525252737fc91e
Pointer size: 132 Bytes
Size of remote file: 2.23 MB

docs/resources/grpo_countdown_1.png ADDED Viewed

Git LFS Details

SHA256: b78dc3ce1cd541e76f2c557dea3aff06b278bb3b5413946a92c584cf42c1369f
Pointer size: 131 Bytes
Size of remote file: 785 kB

docs/resources/grpo_geoqa.png ADDED Viewed

Git LFS Details

SHA256: 71246376b16f2ff288542dca2ff31532b16ef99f5e862797463d548e447e1f8d
Pointer size: 132 Bytes
Size of remote file: 2.24 MB

examples/infer/demo_agent.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# os.environ['SWIFT_DEBUG'] = '1'
+def infer(engine: 'InferEngine', infer_request: 'InferRequest'):
+    stop = [engine.default_template.agent_template.keyword.observation]  # compat react_en
+    request_config = RequestConfig(max_tokens=512, temperature=0, stop=stop)
+    resp_list = engine.infer([infer_request], request_config)
+    query = infer_request.messages[0]['content']
+    response = resp_list[0].choices[0].message.content
+    print(f'query: {query}')
+    print(f'response: {response}')
+    print(f'tool_calls: {resp_list[0].choices[0].message.tool_calls}')
+    tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
+    print(f'tool_response: {tool}')
+    infer_request.messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
+    resp_list = engine.infer([infer_request], request_config)
+    response2 = resp_list[0].choices[0].message.content
+    print(f'response2: {response2}')
+def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'):
+    stop = [engine.default_template.agent_template.keyword.observation]
+    request_config = RequestConfig(max_tokens=512, temperature=0, stream=True, stop=stop)
+    gen_list = engine.infer([infer_request], request_config)
+    query = infer_request.messages[0]['content']
+    response = ''
+    print(f'query: {query}\nresponse: ', end='')
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        delta = resp.choices[0].delta.content
+        response += delta
+        print(delta, end='', flush=True)
+    print()
+    print(f'tool_calls: {resp.choices[0].delta.tool_calls}')
+    tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
+    print(f'tool_response: {tool}\nresponse2: ', end='')
+    infer_request.messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
+    gen_list = engine.infer([infer_request], request_config)
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        print(resp.choices[0].delta.content, end='', flush=True)
+    print()
+def get_infer_request():
+    return InferRequest(
+        messages=[{
+            'role': 'user',
+            'content': "How's the weather in Beijing today?"
+        }],
+        tools=[{
+            'name': 'get_current_weather',
+            'description': 'Get the current weather in a given location',
+            'parameters': {
+                'type': 'object',
+                'properties': {
+                    'location': {
+                        'type': 'string',
+                        'description': 'The city and state, e.g. San Francisco, CA'
+                    },
+                    'unit': {
+                        'type': 'string',
+                        'enum': ['celsius', 'fahrenheit']
+                    }
+                },
+                'required': ['location']
+            }
+        }])
+def infer_continue_generate(engine):
+    # Continue generating after the assistant message.
+    infer_request = InferRequest(messages=[{
+        'role': 'user',
+        'content': 'How is the weather today?'
+    }, {
+        'role': 'assistant',
+        'content': 'It is sunny today, '
+    }, {
+        'role': 'assistant',
+        'content': None
+    }])
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'response: {response}')
+if __name__ == '__main__':
+    from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig
+    from swift.plugin import agent_templates
+    model = 'Qwen/Qwen2.5-1.5B-Instruct'
+    infer_backend = 'pt'
+    if infer_backend == 'pt':
+        engine = PtEngine(model, max_batch_size=64)
+    elif infer_backend == 'vllm':
+        from swift.llm import VllmEngine
+        engine = VllmEngine(model, max_model_len=8192)
+    elif infer_backend == 'lmdeploy':
+        from swift.llm import LmdeployEngine
+        engine = LmdeployEngine(model)
+    # agent_template = agent_templates['hermes']()  # react_en/qwen_en/qwen_en_parallel
+    # engine.default_template.agent_template = agent_template
+    infer(engine, get_infer_request())
+    infer_stream(engine, get_infer_request())
+    # infer_continue_generate(engine)

examples/infer/demo_bert.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from typing import List
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']):
+    resp_list = engine.infer(infer_requests)
+    query0 = infer_requests[0].messages[0]['content']
+    query1 = infer_requests[1].messages[0]['content']
+    print(f'query0: {query0}')
+    print(f'response0: {resp_list[0].choices[0].message.content}')
+    print(f'query1: {query1}')
+    print(f'response1: {resp_list[1].choices[0].message.content}')
+if __name__ == '__main__':
+    # This is an example of BERT with LoRA.
+    from swift.llm import InferEngine, InferRequest, PtEngine, load_dataset, safe_snapshot_download, BaseArguments
+    from swift.tuners import Swift
+    adapter_path = safe_snapshot_download('swift/test_bert')
+    args = BaseArguments.from_pretrained(adapter_path)
+    args.max_length = 512
+    args.truncation_strategy = 'right'
+    # method1
+    model, processor = args.get_model_processor()
+    model = Swift.from_pretrained(model, adapter_path)
+    template = args.get_template(processor)
+    engine = PtEngine.from_model_template(model, template, max_batch_size=64)
+    # method2
+    # engine = PtEngine(args.model, adapters=[adapter_path], max_batch_size=64,
+    #                   task_type=args.task_type, num_labels=args.num_labels)
+    # template = args.get_template(engine.processor)
+    # engine.default_template = template
+    # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset.
+    dataset = load_dataset(['DAMO_NLP/jd:cls#1000'], seed=42)[0]
+    print(f'dataset: {dataset}')
+    infer_requests = [InferRequest(messages=data['messages']) for data in dataset]
+    infer_batch(engine, infer_requests)
+    infer_batch(engine, [
+        InferRequest(messages=[{
+            'role': 'user',
+            'content': '今天天气真好呀'
+        }]),
+        InferRequest(messages=[{
+            'role': 'user',
+            'content': '真倒霉'
+        }])
+    ])

examples/infer/demo_grounding.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# pip install git+https://github.com/huggingface/transformers.git  # transformers>=4.49
+import os
+import re
+from typing import Literal
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+def draw_bbox_qwen2_vl(image, response, norm_bbox: Literal['norm1000', 'none']):
+    matches = re.findall(
+        r'<\|object_ref_start\|>(.*?)<\|object_ref_end\|><\|box_start\|>\((\d+),(\d+)\),\((\d+),(\d+)\)<\|box_end\|>',
+        response)
+    ref = []
+    bbox = []
+    for match_ in matches:
+        ref.append(match_[0])
+        bbox.append(list(match_[1:]))
+    draw_bbox(image, ref, bbox, norm_bbox=norm_bbox)
+def infer_grounding():
+    from swift.llm import PtEngine, RequestConfig, BaseArguments, InferRequest, safe_snapshot_download
+    output_path = 'bbox.png'
+    image = load_image('http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png')
+    infer_request = InferRequest(messages=[{'role': 'user', 'content': 'Task: Object Detection'}], images=[image])
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    adapter_path = safe_snapshot_download('swift/test_grounding')
+    args = BaseArguments.from_pretrained(adapter_path)
+    engine = PtEngine(args.model, adapters=[adapter_path])
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora-response: {response}')
+    draw_bbox_qwen2_vl(image, response, norm_bbox=args.norm_bbox)
+    print(f'output_path: {output_path}')
+    image.save(output_path)
+if __name__ == '__main__':
+    from swift.llm import draw_bbox, load_image
+    infer_grounding()

examples/infer/demo_hf.py ADDED Viewed

	@@ -0,0 +1,61 @@

+def infer_hf():
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    from peft import PeftModel
+    from modelscope import snapshot_download
+    model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct')
+    adapter_dir = snapshot_download('swift/test_lora')
+    model = AutoModelForCausalLM.from_pretrained(
+        model_dir, torch_dtype='auto', device_map='auto', trust_remote_code=True)
+    model = PeftModel.from_pretrained(model, adapter_dir)
+    tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
+    messages = [{
+        'role': 'system',
+        'content': 'You are a helpful assistant.'
+    }, {
+        'role': 'user',
+        'content': 'who are you?'
+    }]
+    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    model_inputs = tokenizer([text], return_tensors='pt', add_special_tokens=False).to(model.device)
+    generated_ids = model.generate(**model_inputs, max_new_tokens=512, do_sample=False)
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    print(f'response: {response}')
+    return response
+def infer_swift():
+    from swift.llm import get_model_tokenizer, get_template, InferRequest, RequestConfig, PtEngine
+    from modelscope import snapshot_download
+    from swift.tuners import Swift
+    model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct')
+    adapter_dir = snapshot_download('swift/test_lora')
+    model, tokenizer = get_model_tokenizer(model_dir, device_map='auto')
+    model = Swift.from_pretrained(model, adapter_dir)
+    template = get_template(model.model_meta.template, tokenizer)
+    engine = PtEngine.from_model_template(model, template)
+    messages = [{
+        'role': 'system',
+        'content': 'You are a helpful assistant.'
+    }, {
+        'role': 'user',
+        'content': 'who are you?'
+    }]
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    resp_list = engine.infer([InferRequest(messages=messages)], request_config=request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'response: {response}')
+    return response
+if __name__ == '__main__':
+    response = infer_hf()
+    response2 = infer_swift()
+    assert response == response2

examples/infer/demo_lora.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+from typing import Literal
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+def infer_multilora(infer_request: 'InferRequest', infer_backend: Literal['vllm', 'pt']):
+    # Dynamic LoRA
+    adapter_path = safe_snapshot_download('swift/test_lora')
+    adapter_path2 = safe_snapshot_download('swift/test_lora2')
+    args = BaseArguments.from_pretrained(adapter_path)
+    if infer_backend == 'pt':
+        engine = PtEngine(args.model)
+    elif infer_backend == 'vllm':
+        from swift.llm import VllmEngine
+        engine = VllmEngine(args.model, enable_lora=True, max_loras=1, max_lora_rank=16)
+    template = get_template(args.template, engine.processor, args.system)
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    adapter_request = AdapterRequest('lora1', adapter_path)
+    adapter_request2 = AdapterRequest('lora2', adapter_path2)
+    # use lora
+    resp_list = engine.infer([infer_request], request_config, template=template, adapter_request=adapter_request)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora1-response: {response}')
+    # origin model
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'response: {response}')
+    # use lora
+    resp_list = engine.infer([infer_request], request_config, template=template, adapter_request=adapter_request2)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora2-response: {response}')
+def infer_lora(infer_request: 'InferRequest'):
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    adapter_path = safe_snapshot_download('swift/test_lora')
+    args = BaseArguments.from_pretrained(adapter_path)
+    # method1
+    # engine = PtEngine(args.model, adapters=[adapter_path])
+    # template = get_template(args.template, engine.tokenizer, args.system)
+    # engine.default_template = template
+    # method2
+    # model, processor = args.get_model_processor()
+    # model = Swift.from_pretrained(model, adapter_path)
+    # template = args.get_template(processor)
+    # engine = PtEngine.from_model_template(model, template)
+    # method3
+    model, tokenizer = get_model_tokenizer(args.model)
+    model = Swift.from_pretrained(model, adapter_path)
+    template = get_template(args.template, tokenizer, args.system)
+    engine = PtEngine.from_model_template(model, template)
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora-response: {response}')
+if __name__ == '__main__':
+    from swift.llm import (PtEngine, RequestConfig, AdapterRequest, get_template, BaseArguments, InferRequest,
+                           safe_snapshot_download, get_model_tokenizer)
+    from swift.tuners import Swift
+    infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
+    # infer_lora(infer_request)
+    infer_multilora(infer_request, 'pt')

examples/infer/demo_mllm.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from typing import List, Literal
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']):
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    metric = InferStats()
+    resp_list = engine.infer(infer_requests, request_config, metrics=[metric])
+    query0 = infer_requests[0].messages[0]['content']
+    print(f'query0: {query0}')
+    print(f'response0: {resp_list[0].choices[0].message.content}')
+    print(f'metric: {metric.compute()}')
+    # metric.reset()  # reuse
+def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'):
+    request_config = RequestConfig(max_tokens=512, temperature=0, stream=True)
+    metric = InferStats()
+    gen_list = engine.infer([infer_request], request_config, metrics=[metric])
+    query = infer_request.messages[0]['content']
+    print(f'query: {query}\nresponse: ', end='')
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        print(resp.choices[0].delta.content, end='', flush=True)
+    print()
+    print(f'metric: {metric.compute()}')
+def get_message(mm_type: Literal['text', 'image', 'video', 'audio']):
+    if mm_type == 'text':
+        message = {'role': 'user', 'content': 'who are you?'}
+    elif mm_type == 'image':
+        message = {
+            'role':
+            'user',
+            'content': [
+                {
+                    'type': 'image',
+                    # url or local_path or PIL.Image or base64
+                    'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png'
+                },
+                {
+                    'type': 'text',
+                    'text': 'How many sheep are there in the picture?'
+                }
+            ]
+        }
+    elif mm_type == 'video':
+        message = {
+            'role':
+            'user',
+            'content': [{
+                'type': 'video',
+                'video': 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4'
+            }, {
+                'type': 'text',
+                'text': 'Describe this video.'
+            }]
+        }
+    elif mm_type == 'audio':
+        message = {
+            'role':
+            'user',
+            'content': [{
+                'type': 'audio',
+                'audio': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav'
+            }, {
+                'type': 'text',
+                'text': 'What does this audio say?'
+            }]
+        }
+    return message
+def get_data(mm_type: Literal['text', 'image', 'video', 'audio']):
+    data = {}
+    if mm_type == 'text':
+        messages = [{'role': 'user', 'content': 'who are you?'}]
+    elif mm_type == 'image':
+        # The number of <image> tags must be the same as len(images).
+        messages = [{'role': 'user', 'content': '<image>How many sheep are there in the picture?'}]
+        # Support URL/Path/base64/PIL.Image
+        data['images'] = ['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png']
+    elif mm_type == 'video':
+        messages = [{'role': 'user', 'content': '<video>Describe this video.'}]
+        data['videos'] = ['https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4']
+    elif mm_type == 'audio':
+        messages = [{'role': 'user', 'content': '<audio>What does this audio say?'}]
+        data['audios'] = ['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav']
+    data['messages'] = messages
+    return data
+if __name__ == '__main__':
+    # The inference of the trained model can be referred to as:
+    # https://github.com/modelscope/ms-swift/tree/main/examples/notebook
+    from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig, load_dataset
+    from swift.plugin import InferStats
+    infer_backend = 'pt'
+    if infer_backend == 'pt':
+        model = 'Qwen/Qwen2-Audio-7B-Instruct'
+        mm_type = 'audio'
+        engine = PtEngine(model, max_batch_size=64)
+    elif infer_backend == 'vllm':
+        # test env: vllm==0.7.3, transformers==4.49.*
+        # The meaning of environment variables can be found at:
+        # https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html#id17
+        from swift.llm import VllmEngine
+        os.environ['MAX_PIXELS'] = '1003520'
+        os.environ['VIDEO_MAX_PIXELS'] = '50176'
+        os.environ['FPS_MAX_FRAMES'] = '12'
+        model = 'Qwen/Qwen2.5-VL-3B-Instruct'
+        # If you encounter insufficient GPU memory, please reduce `max_model_len` and set `max_num_seqs=5`.
+        engine = VllmEngine(model, max_model_len=8192, limit_mm_per_prompt={'image': 5, 'video': 2})
+        mm_type = 'image'  # or 'video'
+    elif infer_backend == 'lmdeploy':
+        # test env: lmdeploy==0.7.1
+        from swift.llm import LmdeployEngine
+        model = 'OpenGVLab/InternVL2_5-1B'
+        engine = LmdeployEngine(model, vision_batch_size=8)
+        mm_type = 'image'  # or 'video'
+    # infer dataset
+    if mm_type == 'audio':
+        dataset = 'speech_asr/speech_asr_aishell1_trainsets:validation#1000'
+    elif mm_type == 'image':
+        dataset = 'AI-ModelScope/LaTeX_OCR:small#1000'
+    elif mm_type == 'video':
+        dataset = 'swift/VideoChatGPT:Generic#100'
+    # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset.
+    dataset = load_dataset([dataset], seed=42)[0]
+    print(f'dataset: {dataset}')
+    infer_requests = [InferRequest(**data) for data in dataset]
+    infer_batch(engine, infer_requests)
+    infer_stream(engine, InferRequest(messages=[get_message(mm_type)]))
+    # This writing is equivalent to the above writing.
+    infer_stream(engine, InferRequest(**get_data(mm_type)))

examples/infer/demo_reward_model.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from typing import List
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']):
+    resp_list = engine.infer(infer_requests)
+    print(f'messages0: {infer_requests[0].messages}')
+    print(f'response0: {resp_list[0].choices[0].message.content}')
+if __name__ == '__main__':
+    from swift.llm import InferEngine, InferRequest, PtEngine, load_dataset
+    model = 'Shanghai_AI_Laboratory/internlm2-1_8b-reward'
+    engine = PtEngine(model, max_batch_size=64)
+    # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset.
+    dataset = load_dataset(['AI-ModelScope/alpaca-gpt4-data-zh#1000'], seed=42)[0]
+    print(f'dataset: {dataset}')
+    infer_requests = [InferRequest(**data) for data in dataset]
+    infer_batch(engine, infer_requests)
+    messages = [{
+        'role': 'user',
+        'content': "Hello! What's your name?"
+    }, {
+        'role': 'assistant',
+        'content': 'My name is InternLM2! A helpful AI assistant. What can I do for you?'
+    }]
+    infer_batch(engine, [InferRequest(messages=messages)])

examples/infer/lmdeploy/ddp.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend lmdeploy \
+    --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
+    --max_new_tokens 2048

examples/infer/lmdeploy/mllm_tp.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --model OpenGVLab/InternVL2_5-1B \
+    --infer_backend lmdeploy \
+    --val_dataset AI-ModelScope/captcha-images#1000 \
+    --tp 2 \
+    --vision_batch_size 8 \
+    --max_new_tokens 2048

examples/infer/pt/batch_ddp.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+# 18GB
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift infer \
+    --model Qwen/Qwen2.5-1.5B-Instruct \
+    --infer_backend pt \
+    --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
+    --max_batch_size 16 \
+    --max_new_tokens 512

examples/infer/pt/bert.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+# Since `swift/test_lora` is trained by swift and contains an `args.json` file,
+# there is no need to explicitly set `--model`, `--system`, etc., as they will be automatically read.
+# To disable this behavior, please set `--load_args false`.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters swift/test_bert \
+    --truncation_strategy right \
+    --max_length 512

examples/infer/pt/lora.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+# Since `swift/test_lora` is trained by swift and contains an `args.json` file,
+# there is no need to explicitly set `--model`, `--system`, etc., as they will be automatically read.
+# To disable this behavior, please set `--load_args false`.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters swift/test_lora \
+    --infer_backend pt \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048

examples/infer/pt/mllm_device_map.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+MAX_PIXELS=1003520 \
+swift infer \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --infer_backend pt \
+    --val_dataset AI-ModelScope/LaTeX_OCR#1000 \
+    --max_batch_size 16 \
+    --max_new_tokens 512

examples/infer/pt/prm.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model Qwen/Qwen2.5-Math-PRM-7B \
+    --infer_backend pt

examples/infer/pt/reward_model.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model Shanghai_AI_Laboratory/internlm2-1_8b-reward \
+    --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
+    --max_batch_size 64

examples/infer/vllm/ddp.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend vllm \
+    --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
+    --gpu_memory_utilization 0.9 \
+    --max_model_len 8192 \
+    --max_new_tokens 2048

examples/infer/vllm/mllm_ddp.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+# You need to use flash-attn (manual installation) instead of xformers.
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --model Qwen/Qwen2-Audio-7B-Instruct \
+    --infer_backend vllm \
+    --val_dataset speech_asr/speech_asr_aishell1_trainsets:validation#1000 \
+    --gpu_memory_utilization 0.9 \
+    --max_model_len 8192 \
+    --max_new_tokens 2048 \
+    --limit_mm_per_prompt '{"audio": 5}'

examples/infer/vllm/mllm_tp.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+CUDA_VISIBLE_DEVICES=0,1 \
+MAX_PIXELS=1003520 \
+swift infer \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --infer_backend vllm \
+    --val_dataset AI-ModelScope/LaTeX_OCR#1000 \
+    --gpu_memory_utilization 0.9 \
+    --tensor_parallel_size 2 \
+    --max_model_len 32768 \
+    --max_new_tokens 2048 \
+    --limit_mm_per_prompt '{"image": 5, "video": 2}'

examples/notebook/qwen2_5-self-cognition/infer.ipynb ADDED Viewed

	@@ -0,0 +1,148 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inference\n",
+    "We have trained a well-trained checkpoint through the `self-cognition-sft.ipynb` tutorial, and here we use `PtEngine` to do the inference on it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig, get_template"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for inference\n",
+    "last_model_checkpoint = 'output/checkpoint-xxx'\n",
+    "\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2.5-3B-Instruct'  # model_id or model_path\n",
+    "system = 'You are a helpful assistant.'\n",
+    "infer_backend = 'pt'\n",
+    "\n",
+    "# generation_config\n",
+    "max_new_tokens = 512\n",
+    "temperature = 0\n",
+    "stream = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get model and template, and load LoRA weights.\n",
+    "engine = PtEngine(model_id_or_path, adapters=[last_model_checkpoint])\n",
+    "template = get_template(engine.model_meta.template, engine.tokenizer, default_system=system)\n",
+    "# You can modify the `default_template` directly here, or pass it in during `engine.infer`.\n",
+    "engine.default_template = template"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "query: who are you?\n",
+      "response: I am an artificial intelligence language model named Xiao Huang, developed by ModelScope. I can answer various questions and engage in conversation with humans. If you have any questions or need help, feel free to ask me at any time.\n",
+      "--------------------------------------------------\n",
+      "query: What should I do if I can't sleep at night?\n",
+      "response: If you're having trouble sleeping, there are several things you can try:\n",
+      "\n",
+      "1. Establish a regular sleep schedule: Try to go to bed and wake up at the same time every day, even on weekends.\n",
+      "\n",
+      "2. Create a relaxing bedtime routine: Engage in calming activities before bed, such as reading a book or taking a warm bath.\n",
+      "\n",
+      "3. Make your bedroom conducive to sleep: Keep your bedroom cool, dark, and quiet. Invest in comfortable bedding and pillows.\n",
+      "\n",
+      "4. Avoid stimulating activities before bed: Avoid using electronic devices, watching TV, or engaging in mentally stimulating activities before bed.\n",
+      "\n",
+      "5. Exercise regularly: Regular physical activity can help improve your sleep quality, but avoid exercising too close to bedtime.\n",
+      "\n",
+      "6. Manage stress: Practice relaxation techniques, such as deep breathing, meditation, or yoga, to help manage stress and promote better sleep.\n",
+      "\n",
+      "7. Limit caffeine and alcohol intake: Both caffeine and alcohol can disrupt sleep patterns, so it's best to limit their consumption, especially in the evening.\n",
+      "\n",
+      "8. Seek professional help: If you continue to have difficulty sleeping despite trying these strategies, consider seeking help from a healthcare provider or a sleep specialist.\n",
+      "--------------------------------------------------\n",
+      "query: 你是谁训练的？\n",
+      "response: 我是由魔搭团队训练和开发的。\n",
+      "--------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "query_list = [\n",
+    "    'who are you?',\n",
+    "    \"What should I do if I can't sleep at night?\",\n",
+    "    '你是谁训练的？',\n",
+    "]\n",
+    "\n",
+    "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature, stream=True)\n",
+    "    gen_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    print(f'query: {query}\\nresponse: ', end='')\n",
+    "    for resp in gen_list[0]:\n",
+    "        if resp is None:\n",
+    "            continue\n",
+    "        print(resp.choices[0].delta.content, end='', flush=True)\n",
+    "    print()\n",
+    "\n",
+    "def infer(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature)\n",
+    "    resp_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    response = resp_list[0].choices[0].message.content\n",
+    "    print(f'query: {query}')\n",
+    "    print(f'response: {response}')\n",
+    "\n",
+    "infer_func = infer_stream if stream else infer\n",
+    "for query in query_list:\n",
+    "    infer_func(engine, InferRequest(messages=[{'role': 'user', 'content': query}]))\n",
+    "    print('-' * 50)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test_py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

examples/notebook/qwen2_5-self-cognition/infer.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+# Here is the command-line style inference code.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048

examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb ADDED Viewed

	@@ -0,0 +1,219 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 10-minute self-cognition SFT\n",
+    "\n",
+    "Here is a demonstration of using python to perform self-cognition SFT of Qwen2.5-3B-Instruct. Through this tutorial, you can quickly understand some details of swift sft, which will be of great help in customizing ms-swift for you~\n",
+    "\n",
+    "Are you ready? Let's begin the journey...\n",
+    "\n",
+    "中文版：[魔搭教程](https://github.com/modelscope/modelscope-classroom/blob/main/LLM-tutorial/R.10%E5%88%86%E9%92%9F%E6%94%B9%E5%8F%98%E5%A4%A7%E6%A8%A1%E5%9E%8B%E8%87%AA%E6%88%91%E8%AE%A4%E7%9F%A5.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# # install ms-swift\n",
+    "# pip install ms-swift -U"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import get_model_tokenizer, load_dataset, get_template, EncodePreprocessor\n",
+    "from swift.utils import get_logger, find_all_linears, get_model_parameter_info, plot_images, seed_everything\n",
+    "from swift.tuners import Swift, LoraConfig\n",
+    "from swift.trainers import Seq2SeqTrainer, Seq2SeqTrainingArguments\n",
+    "from functools import partial\n",
+    "\n",
+    "logger = get_logger()\n",
+    "seed_everything(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for training\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2.5-3B-Instruct'  # model_id or model_path\n",
+    "system = 'You are a helpful assistant.'\n",
+    "output_dir = 'output'\n",
+    "\n",
+    "# dataset\n",
+    "dataset = ['AI-ModelScope/alpaca-gpt4-data-zh#500', 'AI-ModelScope/alpaca-gpt4-data-en#500',\n",
+    "           'swift/self-cognition#500']  # dataset_id or dataset_path\n",
+    "data_seed = 42\n",
+    "max_length = 2048\n",
+    "split_dataset_ratio = 0.01  # Split validation set\n",
+    "num_proc = 4  # The number of processes for data loading.\n",
+    "# The following two parameters are used to override the placeholders in the self-cognition dataset.\n",
+    "model_name = ['小黄', 'Xiao Huang']  # The Chinese name and English name of the model\n",
+    "model_author = ['魔搭', 'ModelScope']  # The Chinese name and English name of the model author\n",
+    "\n",
+    "# lora\n",
+    "lora_rank = 8\n",
+    "lora_alpha = 32\n",
+    "\n",
+    "# training_args\n",
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=output_dir,\n",
+    "    learning_rate=1e-4,\n",
+    "    per_device_train_batch_size=1,\n",
+    "    per_device_eval_batch_size=1,\n",
+    "    gradient_checkpointing=True,\n",
+    "    weight_decay=0.1,\n",
+    "    lr_scheduler_type='cosine',\n",
+    "    warmup_ratio=0.05,\n",
+    "    report_to=['tensorboard'],\n",
+    "    logging_first_step=True,\n",
+    "    save_strategy='steps',\n",
+    "    save_steps=50,\n",
+    "    eval_strategy='steps',\n",
+    "    eval_steps=50,\n",
+    "    gradient_accumulation_steps=16,\n",
+    "    num_train_epochs=1,\n",
+    "    metric_for_best_model='loss',\n",
+    "    save_total_limit=2,\n",
+    "    logging_steps=5,\n",
+    "    dataloader_num_workers=1,\n",
+    "    data_seed=data_seed,\n",
+    ")\n",
+    "\n",
+    "output_dir = os.path.abspath(os.path.expanduser(output_dir))\n",
+    "logger.info(f'output_dir: {output_dir}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Obtain the model and template, and add a trainable Lora layer on the model.\n",
+    "model, tokenizer = get_model_tokenizer(model_id_or_path)\n",
+    "logger.info(f'model_info: {model.model_info}')\n",
+    "template = get_template(model.model_meta.template, tokenizer, default_system=system, max_length=max_length)\n",
+    "template.set_mode('train')\n",
+    "\n",
+    "target_modules = find_all_linears(model)\n",
+    "lora_config = LoraConfig(task_type='CAUSAL_LM', r=lora_rank, lora_alpha=lora_alpha,\n",
+    "                         target_modules=target_modules)\n",
+    "model = Swift.prepare_model(model, lora_config)\n",
+    "logger.info(f'lora_config: {lora_config}')\n",
+    "\n",
+    "# Print model structure and trainable parameters.\n",
+    "logger.info(f'model: {model}')\n",
+    "model_parameter_info = get_model_parameter_info(model)\n",
+    "logger.info(f'model_parameter_info: {model_parameter_info}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download and load the dataset, split it into a training set and a validation set,\n",
+    "# and encode the text data into tokens.\n",
+    "train_dataset, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,\n",
+    "        model_name=model_name, model_author=model_author, seed=data_seed)\n",
+    "\n",
+    "logger.info(f'train_dataset: {train_dataset}')\n",
+    "logger.info(f'val_dataset: {val_dataset}')\n",
+    "logger.info(f'train_dataset[0]: {train_dataset[0]}')\n",
+    "\n",
+    "train_dataset = EncodePreprocessor(template=template)(train_dataset, num_proc=num_proc)\n",
+    "val_dataset = EncodePreprocessor(template=template)(val_dataset, num_proc=num_proc)\n",
+    "logger.info(f'encoded_train_dataset[0]: {train_dataset[0]}')\n",
+    "\n",
+    "# Print a sample\n",
+    "template.print_inputs(train_dataset[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the trainer and start the training.\n",
+    "model.enable_input_require_grads()  # Compatible with gradient checkpointing\n",
+    "trainer = Seq2SeqTrainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    data_collator=template.data_collator,\n",
+    "    train_dataset=train_dataset,\n",
+    "    eval_dataset=val_dataset,\n",
+    "    template=template,\n",
+    ")\n",
+    "trainer.train()\n",
+    "\n",
+    "last_model_checkpoint = trainer.state.last_model_checkpoint\n",
+    "logger.info(f'last_model_checkpoint: {last_model_checkpoint}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize the training loss.\n",
+    "# You can also use the TensorBoard visualization interface during training by entering\n",
+    "# `tensorboard --logdir '{output_dir}/runs'` at the command line.\n",
+    "images_dir = os.path.join(output_dir, 'images')\n",
+    "logger.info(f'images_dir: {images_dir}')\n",
+    "plot_images(images_dir, training_args.logging_dir, ['train/loss'], 0.9)  # save images\n",
+    "\n",
+    "# Read and display the image.\n",
+    "# The light yellow line represents the actual loss value,\n",
+    "# while the yellow line represents the loss value smoothed with a smoothing factor of 0.9.\n",
+    "from IPython.display import display\n",
+    "from PIL import Image\n",
+    "image = Image.open(os.path.join(images_dir, 'train_loss.png'))\n",
+    "display(image)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

examples/notebook/qwen2_5-self-cognition/sft.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+# Here is the command-line style training code.
+# 22GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-3B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --model_name 小黄 'Xiao Huang' \
+    --model_author '魔搭' 'ModelScope'

examples/notebook/qwen2_5-vl-grounding/zh.ipynb ADDED Viewed

	@@ -0,0 +1,261 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Qwen2.5-VL Grounding任务\n",
+    "\n",
+    "这里介绍使用qwen2.5-vl进行grounding任务的全流程介绍。当然，你也可以使用internvl2.5或者qwen2-vl等多模态模型。\n",
+    "\n",
+    "我们使用[AI-ModelScope/coco](https://modelscope.cn/datasets/AI-ModelScope/coco)数据集来展示整个流程。\n",
+    "\n",
+    "如果需要使用自定义数据集，需要符合以下格式："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "{\"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, {\"role\": \"user\", \"content\": \"<image>描述图像\"}, {\"role\": \"assistant\", \"content\": \"<ref-object><bbox>和<ref-object><bbox>正在沙滩上玩耍\"}], \"images\": [\"/xxx/x.jpg\"], \"objects\": {\"ref\": [\"一只狗\", \"一个女人\"], \"bbox\": [[331.5, 761.4, 853.5, 1594.8], [676.5, 685.8, 1099.5, 1427.4]]}}\n",
+    "{\"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, {\"role\": \"user\", \"content\": \"<image>找到图像中的<ref-object>\"}, {\"role\": \"assistant\", \"content\": \"<bbox><bbox>\"}], \"images\": [\"/xxx/x.jpg\"], \"objects\": {\"ref\": [\"羊\"], \"bbox\": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}\n",
+    "{\"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, {\"role\": \"user\", \"content\": \"<image>帮我打开谷歌浏览器\"}, {\"role\": \"assistant\", \"content\": \"Action: click(start_box='<bbox>')\"}], \"images\": [\"/xxx/x.jpg\"], \"objects\": {\"ref\": [], \"bbox\": [[615, 226]]}}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ms-swift在预处理数据集时，会使用模型特有的grounding任务格式，将objects中的ref填充`<ref-object>`，bbox会根据模型类型选择是否进行0-1000的归一化，并填充`<bbox>`。例如：qwen2-vl为`f'<|object_ref_start|>羊<|object_ref_end|>'`和`f'<|box_start|>(101,201),(150,266)<|box_end|>'`（qwen2.5-vl不进行归一化，只将float型转成int型），internvl2.5则为`f'<ref>羊</ref>'`和`f'<box>[[101, 201, 150, 266]]</box>'`等。\n",
+    "\n",
+    "\n",
+    "训练之前，你需要从main分支安装ms-swift："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# pip install git+https://github.com/modelscope/ms-swift.git\n",
+    "\n",
+    "git clone https://github.com/modelscope/ms-swift.git\n",
+    "cd ms-swift\n",
+    "pip install -e .\n",
+    "\n",
+    "# 如果'transformers>=4.49'已经发版，则无需从main分支安装\n",
+    "pip install git+https://github.com/huggingface/transformers.git"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "然后，使用以下shell进行训练。MAX_PIXELS的参数含义可以查看[这里](https://swift.readthedocs.io/en/latest/Instruction/Command-line-parameters.html#specific-model-arguments)\n",
+    "\n",
+    "### 训练\n",
+    "\n",
+    "单卡训练："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# 显存资源：24GiB\n",
+    "CUDA_VISIBLE_DEVICES=0 \\\n",
+    "MAX_PIXELS=1003520 \\\n",
+    "swift sft \\\n",
+    "    --model Qwen/Qwen2.5-VL-7B-Instruct \\\n",
+    "    --dataset 'AI-ModelScope/coco#2000' \\\n",
+    "    --train_type lora \\\n",
+    "    --torch_dtype bfloat16 \\\n",
+    "    --num_train_epochs 1 \\\n",
+    "    --per_device_train_batch_size 1 \\\n",
+    "    --per_device_eval_batch_size 1 \\\n",
+    "    --learning_rate 1e-4 \\\n",
+    "    --lora_rank 8 \\\n",
+    "    --lora_alpha 32 \\\n",
+    "    --target_modules all-linear \\\n",
+    "    --freeze_vit true \\\n",
+    "    --gradient_accumulation_steps 16 \\\n",
+    "    --eval_steps 100 \\\n",
+    "    --save_steps 100 \\\n",
+    "    --save_total_limit 5 \\\n",
+    "    --logging_steps 5 \\\n",
+    "    --max_length 2048 \\\n",
+    "    --output_dir output \\\n",
+    "    --warmup_ratio 0.05 \\\n",
+    "    --dataloader_num_workers 4 \\\n",
+    "    --dataset_num_proc 4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "然后我们将训练的模型推送到ModelScope："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "swift export \\\n",
+    "    --adapters output/vx-xxx/checkpoint-xxx \\\n",
+    "    --push_to_hub true \\\n",
+    "    --hub_model_id '<model-id>' \\\n",
+    "    --hub_token '<sdk-token>' \\\n",
+    "    --use_hf false"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "我们将训练的checkpoint推送到[swift/test_grounding](https://modelscope.cn/models/swift/test_grounding)。\n",
+    "\n",
+    "### 推理\n",
+    "\n",
+    "训练完成后，我们使用以下命令对训练时的验证集进行推理。这里`--adapters`需要替换成训练生成的last checkpoint文件夹。由于adapters文件夹中包含了训练的参数文件，因此不需要额外指定`--model`。\n",
+    "\n",
+    "若模型采用的是绝对坐标的方式进行输出，推理时请提前对图像进行缩放而不使用`MAX_PIXELS`或者`--max_pixels`。若是千分位坐标，则没有此约束。\n",
+    "\n",
+    "由于我们已经将训练后的checkpoint推送到了ModelScope上，以下推理脚本可以直接运行："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "CUDA_VISIBLE_DEVICES=0 \\\n",
+    "swift infer \\\n",
+    "    --adapters swift/test_grounding \\\n",
+    "    --stream true \\\n",
+    "    --load_data_args true \\\n",
+    "    --max_new_tokens 512 \\\n",
+    "    --dataset_num_proc 4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "我们也可以使用代码的方式进行推理：\n",
+    "\n",
+    "单样本推理的例子可以查看[这里](https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_grounding.py)。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "import re\n",
+    "from typing import Literal\n",
+    "from swift.llm import (\n",
+    "    PtEngine, RequestConfig, BaseArguments, InferRequest, safe_snapshot_download, draw_bbox, load_image, load_dataset, InferEngine\n",
+    ")\n",
+    "from IPython.display import display\n",
+    "\n",
+    "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=512, temperature=0, stream=True)\n",
+    "    gen_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    print(f'query: {query}\\nresponse: ', end='')\n",
+    "    response = ''\n",
+    "    for resp in gen_list[0]:\n",
+    "        if resp is None:\n",
+    "            continue\n",
+    "        delta = resp.choices[0].delta.content\n",
+    "        response += delta\n",
+    "        print(delta, end='', flush=True)\n",
+    "    print()\n",
+    "    return response\n",
+    "\n",
+    "def draw_bbox_qwen2_vl(image, response, norm_bbox: Literal['norm1000', 'none']):\n",
+    "    matches = re.findall(\n",
+    "        r'<\\|object_ref_start\\|>(.*?)<\\|object_ref_end\\|><\\|box_start\\|>\\((\\d+),(\\d+)\\),\\((\\d+),(\\d+)\\)<\\|box_end\\|>',\n",
+    "        response)\n",
+    "    ref = []\n",
+    "    bbox = []\n",
+    "    for match_ in matches:\n",
+    "        ref.append(match_[0])\n",
+    "        bbox.append(list(match_[1:]))\n",
+    "    draw_bbox(image, ref, bbox, norm_bbox=norm_bbox)\n",
+    "\n",
+    "# 下载权重，并加载模型\n",
+    "output_dir = 'images_bbox'\n",
+    "model_id_or_path = 'swift/test_grounding'\n",
+    "output_dir = os.path.abspath(os.path.expanduser(output_dir))\n",
+    "adapter_path = safe_snapshot_download(model_id_or_path)\n",
+    "args = BaseArguments.from_pretrained(adapter_path)\n",
+    "engine = PtEngine(args.model, adapters=[adapter_path])\n",
+    "\n",
+    "# 获取验证集并推理\n",
+    "_, val_dataset = load_dataset(args.dataset, split_dataset_ratio=args.split_dataset_ratio, num_proc=4, seed=args.seed)\n",
+    "print(f'output_dir: {output_dir}')\n",
+    "os.makedirs(output_dir, exist_ok=True)\n",
+    "for i, data in enumerate(val_dataset):\n",
+    "    image = data['images'][0]\n",
+    "    image = load_image(image['bytes'] or image['path'])\n",
+    "    display(image)\n",
+    "    response = infer_stream(engine, InferRequest(**data))\n",
+    "    draw_bbox_qwen2_vl(image, response, norm_bbox=args.norm_bbox)\n",
+    "    print('-' * 50)\n",
+    "    image.save(os.path.join(output_dir, f'{i}.png'))\n",
+    "    display(image)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test_py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

examples/notebook/qwen2vl-ocr/infer.ipynb ADDED Viewed

	@@ -0,0 +1,136 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inference\n",
+    "We have trained a well-trained checkpoint through the `ocr-sft.ipynb` tutorial, and here we use `PtEngine` to do the inference on it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import (\n",
+    "    InferEngine, InferRequest, PtEngine, RequestConfig, get_template, load_dataset, load_image\n",
+    ")\n",
+    "from swift.utils import get_model_parameter_info, get_logger, seed_everything\n",
+    "logger = get_logger()\n",
+    "seed_everything(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for inference\n",
+    "last_model_checkpoint = 'output/checkpoint-xxx'\n",
+    "\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2-VL-2B-Instruct'  # model_id or model_path\n",
+    "system = None\n",
+    "infer_backend = 'pt'\n",
+    "\n",
+    "# dataset\n",
+    "dataset = ['AI-ModelScope/LaTeX_OCR#20000']\n",
+    "data_seed = 42\n",
+    "split_dataset_ratio = 0.01\n",
+    "num_proc = 4\n",
+    "strict = False\n",
+    "\n",
+    "# generation_config\n",
+    "max_new_tokens = 512\n",
+    "temperature = 0\n",
+    "stream = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get model and template, and load LoRA weights.\n",
+    "engine = PtEngine(model_id_or_path, adapters=[last_model_checkpoint])\n",
+    "template = get_template(engine.model_meta.template, engine.tokenizer, default_system=system)\n",
+    "# The default mode of the template is 'pt', so there is no need to make any changes.\n",
+    "# template.set_mode('pt')\n",
+    "\n",
+    "model_parameter_info = get_model_parameter_info(engine.model)\n",
+    "logger.info(f'model_parameter_info: {model_parameter_info}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Due to the data_seed setting, the validation set here is the same as the validation set used during training.\n",
+    "_, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,\n",
+    "                              strict=strict, seed=data_seed)\n",
+    "val_dataset = val_dataset.select(range(10))  # Take the first 10 items"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Streaming inference and save images from the validation set.\n",
+    "# The batch processing code can be found here: https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_mllm.py\n",
+    "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature, stream=True)\n",
+    "    gen_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    print(f'query: {query}\\nresponse: ', end='')\n",
+    "    for resp in gen_list[0]:\n",
+    "        if resp is None:\n",
+    "            continue\n",
+    "        print(resp.choices[0].delta.content, end='', flush=True)\n",
+    "    print()\n",
+    "\n",
+    "from IPython.display import display\n",
+    "os.makedirs('images', exist_ok=True)\n",
+    "for i, data in enumerate(val_dataset):\n",
+    "    image = data['images'][0]\n",
+    "    image = load_image(image['bytes'] or image['path'])\n",
+    "    image.save(f'images/{i}.png')\n",
+    "    display(image)\n",
+    "    infer_stream(engine, InferRequest(**data))\n",
+    "    print('-' * 50)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test_py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

examples/notebook/qwen2vl-ocr/ocr-sft.ipynb ADDED Viewed

	@@ -0,0 +1,226 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Latex-OCR SFT\n",
+    "\n",
+    "Here is a demonstration of using python to perform Latex-OCR SFT of Qwen2-VL-2B-Instruct. Through this tutorial, you can quickly understand some details of swift sft, which will be of great help in customizing ms-swift for you~\n",
+    "\n",
+    "Are you ready? Let's begin the journey..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# # install ms-swift\n",
+    "# pip install ms-swift -U"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import (\n",
+    "    get_model_tokenizer, load_dataset, get_template, EncodePreprocessor, get_model_arch,\n",
+    "    get_multimodal_target_regex, LazyLLMDataset\n",
+    ")\n",
+    "from swift.utils import get_logger, get_model_parameter_info, plot_images, seed_everything\n",
+    "from swift.tuners import Swift, LoraConfig\n",
+    "from swift.trainers import Seq2SeqTrainer, Seq2SeqTrainingArguments\n",
+    "from functools import partial\n",
+    "\n",
+    "logger = get_logger()\n",
+    "seed_everything(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for training\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2-VL-2B-Instruct'\n",
+    "system = None  # Using the default system defined in the template.\n",
+    "output_dir = 'output'\n",
+    "\n",
+    "# dataset\n",
+    "dataset = ['AI-ModelScope/LaTeX_OCR#20000']  # dataset_id or dataset_path. Sampling 20000 data points\n",
+    "data_seed = 42\n",
+    "max_length = 2048\n",
+    "split_dataset_ratio = 0.01  # Split validation set\n",
+    "num_proc = 4  # The number of processes for data loading.\n",
+    "\n",
+    "# lora\n",
+    "lora_rank = 8\n",
+    "lora_alpha = 32\n",
+    "freeze_llm = False\n",
+    "freeze_vit = True\n",
+    "freeze_aligner = True\n",
+    "\n",
+    "# training_args\n",
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=output_dir,\n",
+    "    learning_rate=1e-4,\n",
+    "    per_device_train_batch_size=1,\n",
+    "    per_device_eval_batch_size=1,\n",
+    "    gradient_checkpointing=True,\n",
+    "    weight_decay=0.1,\n",
+    "    lr_scheduler_type='cosine',\n",
+    "    warmup_ratio=0.05,\n",
+    "    report_to=['tensorboard'],\n",
+    "    logging_first_step=True,\n",
+    "    save_strategy='steps',\n",
+    "    save_steps=50,\n",
+    "    eval_strategy='steps',\n",
+    "    eval_steps=50,\n",
+    "    gradient_accumulation_steps=16,\n",
+    "    # To observe the training results more quickly, this is set to 1 here. \n",
+    "    # Under normal circumstances, a larger number should be used.\n",
+    "    num_train_epochs=1,\n",
+    "    metric_for_best_model='loss',\n",
+    "    save_total_limit=5,\n",
+    "    logging_steps=5,\n",
+    "    dataloader_num_workers=4,\n",
+    "    data_seed=data_seed,\n",
+    "    remove_unused_columns=False,\n",
+    ")\n",
+    "\n",
+    "output_dir = os.path.abspath(os.path.expanduser(output_dir))\n",
+    "logger.info(f'output_dir: {output_dir}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Obtain the model and template\n",
+    "model, processor = get_model_tokenizer(model_id_or_path)\n",
+    "logger.info(f'model_info: {model.model_info}')\n",
+    "template = get_template(model.model_meta.template, processor, default_system=system, max_length=max_length)\n",
+    "template.set_mode('train')\n",
+    "if template.use_model:\n",
+    "    template.model = model\n",
+    "\n",
+    "# Get target_modules and add trainable LoRA modules to the model.\n",
+    "target_modules = get_multimodal_target_regex(model, freeze_llm=freeze_llm, freeze_vit=freeze_vit, \n",
+    "                            freeze_aligner=freeze_aligner)\n",
+    "lora_config = LoraConfig(task_type='CAUSAL_LM', r=lora_rank, lora_alpha=lora_alpha,\n",
+    "                         target_modules=target_modules)\n",
+    "model = Swift.prepare_model(model, lora_config)\n",
+    "logger.info(f'lora_config: {lora_config}')\n",
+    "\n",
+    "# Print model structure and trainable parameters.\n",
+    "logger.info(f'model: {model}')\n",
+    "model_parameter_info = get_model_parameter_info(model)\n",
+    "logger.info(f'model_parameter_info: {model_parameter_info}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download and load the dataset, split it into a training set and a validation set,\n",
+    "# and encode the text data into tokens.\n",
+    "train_dataset, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,\n",
+    "                                          seed=data_seed)\n",
+    "\n",
+    "logger.info(f'train_dataset: {train_dataset}')\n",
+    "logger.info(f'val_dataset: {val_dataset}')\n",
+    "logger.info(f'train_dataset[0]: {train_dataset[0]}')\n",
+    "\n",
+    "train_dataset = LazyLLMDataset(train_dataset, template.encode, random_state=data_seed)\n",
+    "val_dataset = LazyLLMDataset(val_dataset, template.encode, random_state=data_seed)\n",
+    "data = train_dataset[0]\n",
+    "logger.info(f'encoded_train_dataset[0]: {data}')\n",
+    "\n",
+    "template.print_inputs(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the trainer and start the training.\n",
+    "model.enable_input_require_grads()  # Compatible with gradient checkpointing\n",
+    "trainer = Seq2SeqTrainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    data_collator=template.data_collator,\n",
+    "    train_dataset=train_dataset,\n",
+    "    eval_dataset=val_dataset,\n",
+    "    template=template,\n",
+    ")\n",
+    "trainer.train()\n",
+    "\n",
+    "last_model_checkpoint = trainer.state.last_model_checkpoint\n",
+    "logger.info(f'last_model_checkpoint: {last_model_checkpoint}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize the training loss.\n",
+    "# You can also use the TensorBoard visualization interface during training by entering\n",
+    "# `tensorboard --logdir '{output_dir}/runs'` at the command line.\n",
+    "images_dir = os.path.join(output_dir, 'images')\n",
+    "logger.info(f'images_dir: {images_dir}')\n",
+    "plot_images(images_dir, training_args.logging_dir, ['train/loss'], 0.9)  # save images\n",
+    "\n",
+    "# Read and display the image.\n",
+    "# The light yellow line represents the actual loss value,\n",
+    "# while the yellow line represents the loss value smoothed with a smoothing factor of 0.9.\n",
+    "from IPython.display import display\n",
+    "from PIL import Image\n",
+    "image = Image.open(os.path.join(images_dir, 'train_loss.png'))\n",
+    "display(image)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

examples/sampler/distill/distill.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+OPENAI_API_KEY="xxx" \
+swift sample \
+    --sampler_type distill \
+    --sampler_engine client \
+    --model deepseek-r1 \
+    --stream true \
+    --dataset tastelikefeet/competition_math#5 \
+    --num_return_sequences 1 \
+    --temperature 0.6 \
+    --top_p 0.95 \
+    --engine_kwargs '{"base_url":"https://dashscope.aliyuncs.com/compatible-mode/v1"}'

examples/sampler/mcts/mcts.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import os
+import subprocess
+import time
+from typing import List
+import json
+from modelscope.msdatasets import MsDataset
+conda_prefix = ''
+def client_sample(model: str, orm: str, dataset_path: str, iter: int, device_count: int, output_dir: str):
+    handlers = []
+    # Sampling cache
+    api_key = os.getenv('DASHSCOPE_API_KEY')
+    for device in range(device_count):
+        output_file = f'iter_{iter}_proc_{device}.jsonl'
+        cache_file = f'iter_{iter}_proc_{device}_cache.jsonl'
+        dataset = f'train_{device:02}.jsonl'
+        # output_file_path = os.path.join(output_dir, output_file)
+        cache_file_path = os.path.join(output_dir, cache_file)
+        single_dataset_path = os.path.join(dataset_path, dataset)
+        if not os.path.exists(cache_file_path):
+            open(cache_file_path, 'w').close()
+        sample_cmd = (f'USE_OPENCOMPASS_EVALUATOR=True '
+                      f'swift sample '
+                      f'--model {model} '
+                      f'--orm_model {orm} '
+                      f'--sampler_type mcts '
+                      f'--process_reward_rate 0 '
+                      f'--stop_words ки '
+                      f'--seed 42 '
+                      f'--api_key {api_key} '
+                      f'--dataset {single_dataset_path} '
+                      f'--max_length 2048 '
+                      f'--system ./scripts/sampler/system_prompt.txt '
+                      f'--load_args false '
+                      f'--sampler_engine client '
+                      f'--max_new_tokens 768 '
+                      f'--override_exist_file true '
+                      f'--num_sampling_per_gpu_batch_size 1 '
+                      f'--num_return_sequences 8 '
+                      f'--exploration_rate 0.2 '
+                      f'--max_iterations 200 '
+                      f'--output_dir {output_dir} '
+                      f'--cache_files {cache_file} '
+                      f'--output_file {output_file} '
+                      f'--temperature 1.0 ')
+        print(f'Sampling caches of iter {iter}, part {device}.', flush=True)
+        # env['CUDA_VISIBLE_DEVICES'] = str(device)
+        handler = subprocess.Popen(
+            f'{sample_cmd}' + f' > mcts_logs/sample_iter_{iter}_proc_{device}_cache.log 2>&1',
+            env=os.environ.copy(),
+            shell=True,
+            executable='/bin/bash')
+        handlers.append(handler)
+    datasets = []
+    for proc, handler in enumerate(handlers):
+        handler.wait()
+        assert os.path.exists(os.path.join(output_dir, f'iter_{iter}_proc_{proc}.jsonl'))
+        datasets.append(os.path.join('sample_output', f'iter_{iter}_proc_{proc}.jsonl'))
+    print(f'Sampling done, files:{datasets}', flush=True)
+def split_dataset(ds, split_size, out_path):
+    data_size = int(len(ds) / split_size) + 1
+    for i in range(split_size):
+        file_name = f'train_{i:02}.jsonl'
+        file_path = os.path.join(out_path, file_name)
+        print(file_path)
+        ds_split = ds[data_size * i:min(data_size * (i + 1), len(ds))]
+        print(f"split_size: {len(ds_split['problem'])}")
+        with open(file_path, 'w', encoding='utf-8') as file:
+            for problem, solution in zip(ds_split['problem'], ds_split['solution']):
+                message = {
+                    'messages': [
+                        {
+                            'role': 'user',
+                            'content': problem,
+                        },
+                        {
+                            'role': 'assistant',
+                            'content': solution,
+                        },
+                    ]
+                }
+                file.write(json.dumps(message, ensure_ascii=False) + '\n')
+def main():
+    server_model = 'qwen-max'
+    orm = 'math'
+    device_count = 20
+    output_dir = 'output/sampler/client_mcts/'
+    dataset_dir = 'datasets/competition_math/'
+    log_dir = 'mcts_logs/'
+    os.makedirs(output_dir, exist_ok=True)
+    os.makedirs(dataset_dir, exist_ok=True)
+    os.makedirs(log_dir, exist_ok=True)
+    ds = MsDataset.load('tastelikefeet/competition_math', subset_name='default', split='train')
+    split_dataset(ds, device_count, dataset_dir)
+    ts = time.time()
+    client_sample(server_model, orm, dataset_dir, 0, device_count, output_dir)
+    print(f'do sample cost: {(time.time() - ts) / 60:.1f} minutes.', flush=True)
+if __name__ == '__main__':
+    main()

examples/sampler/mcts/mcts.sh ADDED Viewed

	@@ -0,0 +1,35 @@

+export CUDA_VISIBLE_DEVICES=0
+export USE_OPENCOMPASS_EVALUATOR=True
+swift sample \
+    --model ./output/Qwen2.5-Math-7B-Instruct/v40-20250126-161112/checkpoint-20 \
+    --orm_model math \
+    --sampler_type mcts \
+    --sampler_engine vllm \
+    --output_dir ./output/sampler/mcts \
+    --system ./examples/sampler/system_prompt.txt \
+    --stop_words ки \
+    --dataset ./datasets/competition_math/small_test.jsonl \
+    --num_return_sequences 2 \
+    --process_reward_rate 0 \
+    --max_new_tokens 2048
+## Train
+# nproc_per_node=8
+# NPROC_PER_NODE=$nproc_per_node \
+# swift sft \
+#     --model Qwen/Qwen2.5-Math-7B-Instruct \
+#     --train_type full \
+#     --torch_dtype bfloat16 \
+#     --dataset 'datasets/gen_V5.jsonl' \
+#     --num_train_epochs 1 \
+#     --per_device_train_batch_size 1 \
+#     --learning_rate 1e-5 \
+#     --gradient_accumulation_steps $(expr 128 / $nproc_per_node) \
+#     --eval_steps 1000 \
+#     --save_steps 10 \
+#     --save_total_limit 100 \
+#     --max_length 10000 \
+#     --logging_steps 5 \
+#     --gradient_checkpointing_kwargs '{"use_reentrant": false}' \
+#     --deepspeed zero3

examples/sampler/mcts/system_prompt.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+You are a math model, you should **think step by step** carefully. Each step should **end with \"ки\”**. Final answer should be in a ‘\boxed()’.
+## Example:
+Step1: XXX. ки\n
+Step2: XXX. ки\n
+Step3: XXX. ки\n
+Answer: \boxed(answer). ки\n

examples/train/agent/deepseek_r1.sh ADDED Viewed

	@@ -0,0 +1,27 @@

+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
+    --train_type full \
+    --dataset AI-ModelScope/function-calling-chatml \
+    --agent_template react_en \
+    --loss_scale react \
+    --response_prefix '' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --save_only_model true \
+    --packing true \
+    --use_liger_kernel true \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --attn_impl flash_attn \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 16

examples/train/agent/glm4.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+# 4 * 80GiB
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift sft \
+    --model ZhipuAI/GLM-4-9B-0414 \
+    --train_type full \
+    --dataset AI-ModelScope/function-calling-chatml \
+    --agent_template hermes \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --save_only_model true \
+    --packing true \
+    --deepspeed zero3 \
+    --use_liger_kernel true \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --attn_impl flash_attn \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 16

examples/train/agent/loss_scale/infer_lora.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# os.environ['SWIFT_DEBUG'] = '1'
+def infer(engine: 'InferEngine', infer_request: 'InferRequest'):
+    stop = [engine.default_template.agent_template.keyword.observation]  # compat react_en
+    request_config = RequestConfig(max_tokens=512, temperature=0, stop=stop)
+    resp_list = engine.infer([infer_request], request_config)
+    query = infer_request.messages[0]['content']
+    response = resp_list[0].choices[0].message.content
+    print(f'query: {query}')
+    print(f'response: {response}')
+    print(f'tool_calls: {resp_list[0].choices[0].message.tool_calls}')
+    tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
+    print(f'tool_response: {tool}')
+    infer_request.messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
+    resp_list = engine.infer([infer_request], request_config)
+    response2 = resp_list[0].choices[0].message.content
+    print(f'response2: {response2}')
+def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'):
+    stop = [engine.default_template.agent_template.keyword.observation]
+    request_config = RequestConfig(max_tokens=512, temperature=0, stream=True, stop=stop)
+    gen_list = engine.infer([infer_request], request_config)
+    query = infer_request.messages[0]['content']
+    response = ''
+    print(f'query: {query}\nresponse: ', end='')
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        delta = resp.choices[0].delta.content
+        response += delta
+        print(delta, end='', flush=True)
+    print()
+    print(f'tool_calls: {resp.choices[0].delta.tool_calls}')
+    tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
+    print(f'tool_response: {tool}\nresponse2: ', end='')
+    infer_request.messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
+    gen_list = engine.infer([infer_request], request_config)
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        print(resp.choices[0].delta.content, end='', flush=True)
+    print()
+def get_infer_request():
+    return InferRequest(
+        messages=[{
+            'role': 'user',
+            'content': "How's the weather in Beijing today?"
+        }],
+        tools=[{
+            'name': 'get_current_weather',
+            'description': 'Get the current weather in a given location',
+            'parameters': {
+                'type': 'object',
+                'properties': {
+                    'location': {
+                        'type': 'string',
+                        'description': 'The city and state, e.g. San Francisco, CA'
+                    },
+                    'unit': {
+                        'type': 'string',
+                        'enum': ['celsius', 'fahrenheit']
+                    }
+                },
+                'required': ['location']
+            }
+        }])
+if __name__ == '__main__':
+    from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig
+    from swift.plugin import agent_templates
+    model = 'Qwen/Qwen2.5-3B'
+    adapters = ['output/vx-xxx/checkpoint-xxx']
+    engine = PtEngine(model, adapters=adapters, max_batch_size=8)
+    # agent_template = agent_templates['hermes']()  # react_en/qwen_en/qwen_en_parallel
+    # engine.default_template.agent_template = agent_template
+    infer(engine, get_infer_request())
+    infer_stream(engine, get_infer_request())

examples/train/agent/loss_scale/train.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+# 20GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-3B \
+    --train_type lora \
+    --dataset AI-ModelScope/function-calling-chatml#10000 \
+    --loss_scale hermes \
+    --agent_template hermes \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --modules_to_save embed_tokens lm_head \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --use_liger_kernel true \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 16

examples/train/agent/qwen2_5.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+# 35GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-3B \
+    --train_type full \
+    --dataset AI-ModelScope/function-calling-chatml \
+    --agent_template hermes \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --save_only_model true \
+    --packing true \
+    --use_liger_kernel true \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --attn_impl flash_attn \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 16

examples/train/all_to_all/infer.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+# 53GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model BAAI/Emu3-Gen \
+    --infer_backend pt \
+    --stream False \
+    --use_chat_template False \
+    --top_k 2048 \
+    --max_new_tokens 40960

examples/train/all_to_all/train.sh ADDED Viewed

	@@ -0,0 +1,23 @@

+# 70 GiB * 2
+nproc_per_node=2
+NPROC_PER_NODE=$nproc_per_node \
+CUDA_VISIBLE_DEVICES=0,2 \
+max_position_embeddings=10240 \
+image_area=518400 \
+swift sft \
+    --model BAAI/Emu3-Gen \
+    --train_type lora \
+    --dataset 'swift/TextCaps#40' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 4 \
+    --warmup_ratio 0.03 \
+    --eval_steps 500 \
+    --save_steps 500 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 1024 \
+    --weight_decay 0.1 \
+    --gradient_checkpointing_kwargs '{"use_reentrant": false}'

examples/train/base_to_chat/full.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+nproc_per_node=2
+CUDA_VISIBLE_DEVICES=0,1 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model Qwen/Qwen2.5-1.5B \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot \
+    --deepspeed zero2

examples/train/base_to_chat/lora.sh ADDED Viewed

	@@ -0,0 +1,34 @@

+# Use `--template default`
+nproc_per_node=2
+CUDA_VISIBLE_DEVICES=0,1 \
+MASTER_PORT=29501 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model Qwen/Qwen2.5-1.5B \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition' \
+    --torch_dtype bfloat16 \
+    --template default \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot \
+    --deepspeed zero2

examples/train/base_to_chat/lora2.sh ADDED Viewed

	@@ -0,0 +1,33 @@

+# Use `--target_modules all-linear embed_tokens lm_head`
+# Please adjust the `lm_head` according to the model.
+nproc_per_node=2
+CUDA_VISIBLE_DEVICES=0,1 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model Qwen/Qwen2.5-1.5B \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear embed_tokens lm_head \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot \
+    --deepspeed zero2

examples/train/embedding/train_gme.sh ADDED Viewed

	@@ -0,0 +1,29 @@

+nproc_per_node=8
+# losses: plugin/loss.py
+# 8*40G
+MAX_PIXELS=1003520 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model iic/gme-Qwen2-VL-2B-Instruct \
+    --train_type lora \
+    --dataset 'swift/TextCaps:emb' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 2 \
+    --gradient_accumulation_steps $(expr 64 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --eval_strategy steps \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --lazy_tokenize true \
+    --warmup_ratio 0.05 \
+    --learning_rate 5e-6 \
+    --deepspeed zero3 \
+    --dataloader_num_workers 4 \
+    --task_type embedding \
+    --loss_type infonce \
+    --dataloader_drop_last true

examples/train/embedding/train_gte.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+nproc_per_node=8
+# 4*12G
+# losses: plugin/loss.py
+# data format: docs/source_en/Customization/Custom-dataset.md
+# --use_chat_template must be false to use generation template
+# --dataloader_drop_last must be true or eval gather will throw error
+# --model iic/gte-modernbert-base modernbert also supported
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model iic/gte_Qwen2-7B-instruct \
+    --train_type lora \
+    --dataset 'sentence-transformers/stsb' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps $(expr 64 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --eval_strategy steps \
+    --use_chat_template false \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --learning_rate 5e-6 \
+    --deepspeed zero3 \
+    --dataloader_num_workers 4 \
+    --task_type embedding \
+    --loss_type cosine_similarity \
+    --dataloader_drop_last true \

examples/train/full/infer.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+# If you are using the validation set for inference, add the parameter `--load_data_args true`.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048

examples/train/full/qwen2_5_32b.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+# 8 * 80GiB
+NPROC_PER_NODE=8 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift sft \
+    --model Qwen/Qwen2.5-32B \
+    --train_type full \
+    --dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT' \
+    --torch_dtype bfloat16 \
+    --max_steps 2000 \
+    --streaming true \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 2 \
+    --packing true \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 8 \
+    --dataset_num_proc 8 \
+    --save_total_limit 2 \
+    --save_only_model true \
+    --output_dir output/Qwen2.5-32B \
+    --deepspeed zero3 \
+    --use_liger_kernel true \
+    --attn_impl flash_attn

examples/train/full/train.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+# 76GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot

examples/train/grpo/external/README.md ADDED Viewed

	@@ -0,0 +1,46 @@

+# README: GRPO External Mode Execution Scripts
+---
+> **Note**: External mode requires vLLM version 0.8.3 or higher.
+## **Introduction**
+The GRPO (Gradient-based Reinforcement Policy Optimization) training framework supports high-performance inference engines like vLLM to accelerate the sampling process. The **External Mode** allows you to connect to an external vLLM inference server, separating the inference service from the training process. This mode is ideal for scenarios where you want to offload inference to dedicated hardware or servers, improving resource utilization and scalability.
+This folder contains scripts and instructions for running GRPO in **External Mode**, enabling integration with an external vLLM server.
+Before running the scripts, ensure the following:
+1. **vLLM Server Deployment**:
+   - An external vLLM server must be deployed and accessible.
+   - Use the `swift rollout` command to deploy the vLLM server.
+2. **Network Connectivity**:
+   - Ensure the training nodes can communicate with the vLLM server over the network.
+## **Deploying the vLLM Server**
+To deploy an external vLLM server, use the following command:
+```bash
+CUDA_VISIBLE_DEVICES=0 \
+swift rollout \
+  --model Qwen/Qwen3-8B
+# tp
+CUDA_VISIBLE_DEVICES=0,1 \
+swift rollout \
+  --model Qwen/Qwen3-8B \
+  --tensor_parallel_size 2
+```
+## Training with External vLLM Server
+```bash
+--vllm_server_host <server ip> \
+--vllm_server_port <server port> \
+--vllm_server_timeout <Timeout duration> \
+```
+Configuration Parameters
+When using an external vLLM server, configure the following parameters:

examples/train/grpo/external/grpo.sh ADDED Viewed

	@@ -0,0 +1,33 @@

+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-32B-Instruct \
+    --reward_funcs accuracy \
+    --use_vllm true \
+    --vllm_server_host xxx \
+    --vllm_server_port 8000 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset AI-MO/NuminaMath-TIR#1000 \
+    --max_completion_length 2048 \
+    --num_train_epochs 3 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 1 \
+    --save_total_limit 2 \
+    --logging_steps 1 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --top_p 0.9 \
+    --top_k 50 \
+    --deepspeed zero3 \
+    --log_completions true \
+    --num_iterations 1 \
+    --num_infer_workers 1 \
+    --report_to tensorboard wandb \
+    --beta 0.0