diff --git a/.gitattributes b/.gitattributes
index 12e94bade155bdc4b7c9850ec7b4c3ce49106f01..30e76d4df4681b8a314c710a62bd10ad6d5e9f8b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -38,3 +38,6 @@ asset/banner.png filter=lfs diff=lfs merge=lfs -text
 docs/resources/grpo_clevr_count.png filter=lfs diff=lfs merge=lfs -text
 docs/resources/grpo_code.png filter=lfs diff=lfs merge=lfs -text
 docs/resources/dpo_data.png filter=lfs diff=lfs merge=lfs -text
+docs/resources/grpo_countdown_1.png filter=lfs diff=lfs merge=lfs -text
+docs/resources/grpo_countdown.png filter=lfs diff=lfs merge=lfs -text
+docs/resources/grpo_geoqa.png filter=lfs diff=lfs merge=lfs -text
diff --git a/docs/resources/grpo_countdown.png b/docs/resources/grpo_countdown.png
new file mode 100644
index 0000000000000000000000000000000000000000..af2ce0c0ce08cb3d8b152f6bafe2c15c056dcd72
--- /dev/null
+++ b/docs/resources/grpo_countdown.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b55fe6864e0c92549940d6989d92b3ab22be38a035cff3694525252737fc91e
+size 2226402
diff --git a/docs/resources/grpo_countdown_1.png b/docs/resources/grpo_countdown_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..819ab3d992619b077d75e6946d4637b030b8d213
--- /dev/null
+++ b/docs/resources/grpo_countdown_1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b78dc3ce1cd541e76f2c557dea3aff06b278bb3b5413946a92c584cf42c1369f
+size 785044
diff --git a/docs/resources/grpo_geoqa.png b/docs/resources/grpo_geoqa.png
new file mode 100644
index 0000000000000000000000000000000000000000..071d9b8eacb301bd96e30c2eff1471e68a7632a8
--- /dev/null
+++ b/docs/resources/grpo_geoqa.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71246376b16f2ff288542dca2ff31532b16ef99f5e862797463d548e447e1f8d
+size 2238084
diff --git a/examples/infer/demo_agent.py b/examples/infer/demo_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4867c11e59e01a89ee9920c80a28d86dc699efe
--- /dev/null
+++ b/examples/infer/demo_agent.py
@@ -0,0 +1,118 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# os.environ['SWIFT_DEBUG'] = '1'
+
+
+def infer(engine: 'InferEngine', infer_request: 'InferRequest'):
+    stop = [engine.default_template.agent_template.keyword.observation]  # compat react_en
+    request_config = RequestConfig(max_tokens=512, temperature=0, stop=stop)
+    resp_list = engine.infer([infer_request], request_config)
+    query = infer_request.messages[0]['content']
+    response = resp_list[0].choices[0].message.content
+    print(f'query: {query}')
+    print(f'response: {response}')
+    print(f'tool_calls: {resp_list[0].choices[0].message.tool_calls}')
+
+    tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
+    print(f'tool_response: {tool}')
+    infer_request.messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
+    resp_list = engine.infer([infer_request], request_config)
+    response2 = resp_list[0].choices[0].message.content
+    print(f'response2: {response2}')
+
+
+def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'):
+    stop = [engine.default_template.agent_template.keyword.observation]
+    request_config = RequestConfig(max_tokens=512, temperature=0, stream=True, stop=stop)
+    gen_list = engine.infer([infer_request], request_config)
+    query = infer_request.messages[0]['content']
+    response = ''
+    print(f'query: {query}\nresponse: ', end='')
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        delta = resp.choices[0].delta.content
+        response += delta
+        print(delta, end='', flush=True)
+    print()
+    print(f'tool_calls: {resp.choices[0].delta.tool_calls}')
+
+    tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
+    print(f'tool_response: {tool}\nresponse2: ', end='')
+    infer_request.messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
+    gen_list = engine.infer([infer_request], request_config)
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        print(resp.choices[0].delta.content, end='', flush=True)
+    print()
+
+
+def get_infer_request():
+    return InferRequest(
+        messages=[{
+            'role': 'user',
+            'content': "How's the weather in Beijing today?"
+        }],
+        tools=[{
+            'name': 'get_current_weather',
+            'description': 'Get the current weather in a given location',
+            'parameters': {
+                'type': 'object',
+                'properties': {
+                    'location': {
+                        'type': 'string',
+                        'description': 'The city and state, e.g. San Francisco, CA'
+                    },
+                    'unit': {
+                        'type': 'string',
+                        'enum': ['celsius', 'fahrenheit']
+                    }
+                },
+                'required': ['location']
+            }
+        }])
+
+
+def infer_continue_generate(engine):
+    # Continue generating after the assistant message.
+    infer_request = InferRequest(messages=[{
+        'role': 'user',
+        'content': 'How is the weather today?'
+    }, {
+        'role': 'assistant',
+        'content': 'It is sunny today, '
+    }, {
+        'role': 'assistant',
+        'content': None
+    }])
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'response: {response}')
+
+
+if __name__ == '__main__':
+    from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig
+    from swift.plugin import agent_templates
+    model = 'Qwen/Qwen2.5-1.5B-Instruct'
+    infer_backend = 'pt'
+
+    if infer_backend == 'pt':
+        engine = PtEngine(model, max_batch_size=64)
+    elif infer_backend == 'vllm':
+        from swift.llm import VllmEngine
+        engine = VllmEngine(model, max_model_len=8192)
+    elif infer_backend == 'lmdeploy':
+        from swift.llm import LmdeployEngine
+        engine = LmdeployEngine(model)
+
+    # agent_template = agent_templates['hermes']()  # react_en/qwen_en/qwen_en_parallel
+    # engine.default_template.agent_template = agent_template
+
+    infer(engine, get_infer_request())
+    infer_stream(engine, get_infer_request())
+
+    # infer_continue_generate(engine)
diff --git a/examples/infer/demo_bert.py b/examples/infer/demo_bert.py
new file mode 100644
index 0000000000000000000000000000000000000000..852f970c25604e0398a6e2baa41f2fcd1d2da768
--- /dev/null
+++ b/examples/infer/demo_bert.py
@@ -0,0 +1,53 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from typing import List
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+
+def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']):
+    resp_list = engine.infer(infer_requests)
+    query0 = infer_requests[0].messages[0]['content']
+    query1 = infer_requests[1].messages[0]['content']
+    print(f'query0: {query0}')
+    print(f'response0: {resp_list[0].choices[0].message.content}')
+    print(f'query1: {query1}')
+    print(f'response1: {resp_list[1].choices[0].message.content}')
+
+
+if __name__ == '__main__':
+    # This is an example of BERT with LoRA.
+    from swift.llm import InferEngine, InferRequest, PtEngine, load_dataset, safe_snapshot_download, BaseArguments
+    from swift.tuners import Swift
+    adapter_path = safe_snapshot_download('swift/test_bert')
+    args = BaseArguments.from_pretrained(adapter_path)
+    args.max_length = 512
+    args.truncation_strategy = 'right'
+    # method1
+    model, processor = args.get_model_processor()
+    model = Swift.from_pretrained(model, adapter_path)
+    template = args.get_template(processor)
+    engine = PtEngine.from_model_template(model, template, max_batch_size=64)
+
+    # method2
+    # engine = PtEngine(args.model, adapters=[adapter_path], max_batch_size=64,
+    #                   task_type=args.task_type, num_labels=args.num_labels)
+    # template = args.get_template(engine.processor)
+    # engine.default_template = template
+
+    # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset.
+    dataset = load_dataset(['DAMO_NLP/jd:cls#1000'], seed=42)[0]
+    print(f'dataset: {dataset}')
+    infer_requests = [InferRequest(messages=data['messages']) for data in dataset]
+    infer_batch(engine, infer_requests)
+
+    infer_batch(engine, [
+        InferRequest(messages=[{
+            'role': 'user',
+            'content': '今天天气真好呀'
+        }]),
+        InferRequest(messages=[{
+            'role': 'user',
+            'content': '真倒霉'
+        }])
+    ])
diff --git a/examples/infer/demo_grounding.py b/examples/infer/demo_grounding.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f20fd8294a3d7515e9f3e349f775f8b044a5d04
--- /dev/null
+++ b/examples/infer/demo_grounding.py
@@ -0,0 +1,43 @@
+# pip install git+https://github.com/huggingface/transformers.git  # transformers>=4.49
+import os
+import re
+from typing import Literal
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+
+def draw_bbox_qwen2_vl(image, response, norm_bbox: Literal['norm1000', 'none']):
+    matches = re.findall(
+        r'<\|object_ref_start\|>(.*?)<\|object_ref_end\|><\|box_start\|>\((\d+),(\d+)\),\((\d+),(\d+)\)<\|box_end\|>',
+        response)
+    ref = []
+    bbox = []
+    for match_ in matches:
+        ref.append(match_[0])
+        bbox.append(list(match_[1:]))
+    draw_bbox(image, ref, bbox, norm_bbox=norm_bbox)
+
+
+def infer_grounding():
+    from swift.llm import PtEngine, RequestConfig, BaseArguments, InferRequest, safe_snapshot_download
+    output_path = 'bbox.png'
+    image = load_image('http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png')
+    infer_request = InferRequest(messages=[{'role': 'user', 'content': 'Task: Object Detection'}], images=[image])
+
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    adapter_path = safe_snapshot_download('swift/test_grounding')
+    args = BaseArguments.from_pretrained(adapter_path)
+
+    engine = PtEngine(args.model, adapters=[adapter_path])
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora-response: {response}')
+
+    draw_bbox_qwen2_vl(image, response, norm_bbox=args.norm_bbox)
+    print(f'output_path: {output_path}')
+    image.save(output_path)
+
+
+if __name__ == '__main__':
+    from swift.llm import draw_bbox, load_image
+    infer_grounding()
diff --git a/examples/infer/demo_hf.py b/examples/infer/demo_hf.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2f2114a279b41affd6137bb2607f1740670f566
--- /dev/null
+++ b/examples/infer/demo_hf.py
@@ -0,0 +1,61 @@
+def infer_hf():
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    from peft import PeftModel
+    from modelscope import snapshot_download
+    model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct')
+    adapter_dir = snapshot_download('swift/test_lora')
+    model = AutoModelForCausalLM.from_pretrained(
+        model_dir, torch_dtype='auto', device_map='auto', trust_remote_code=True)
+    model = PeftModel.from_pretrained(model, adapter_dir)
+
+    tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
+
+    messages = [{
+        'role': 'system',
+        'content': 'You are a helpful assistant.'
+    }, {
+        'role': 'user',
+        'content': 'who are you?'
+    }]
+    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    model_inputs = tokenizer([text], return_tensors='pt', add_special_tokens=False).to(model.device)
+
+    generated_ids = model.generate(**model_inputs, max_new_tokens=512, do_sample=False)
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    print(f'response: {response}')
+    return response
+
+
+def infer_swift():
+    from swift.llm import get_model_tokenizer, get_template, InferRequest, RequestConfig, PtEngine
+    from modelscope import snapshot_download
+    from swift.tuners import Swift
+    model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct')
+    adapter_dir = snapshot_download('swift/test_lora')
+    model, tokenizer = get_model_tokenizer(model_dir, device_map='auto')
+    model = Swift.from_pretrained(model, adapter_dir)
+    template = get_template(model.model_meta.template, tokenizer)
+    engine = PtEngine.from_model_template(model, template)
+
+    messages = [{
+        'role': 'system',
+        'content': 'You are a helpful assistant.'
+    }, {
+        'role': 'user',
+        'content': 'who are you?'
+    }]
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    resp_list = engine.infer([InferRequest(messages=messages)], request_config=request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'response: {response}')
+    return response
+
+
+if __name__ == '__main__':
+    response = infer_hf()
+    response2 = infer_swift()
+    assert response == response2
diff --git a/examples/infer/demo_lora.py b/examples/infer/demo_lora.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d9396f135cca27c89b09636b5c5ffcc749a2335
--- /dev/null
+++ b/examples/infer/demo_lora.py
@@ -0,0 +1,68 @@
+import os
+from typing import Literal
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+
+def infer_multilora(infer_request: 'InferRequest', infer_backend: Literal['vllm', 'pt']):
+    # Dynamic LoRA
+    adapter_path = safe_snapshot_download('swift/test_lora')
+    adapter_path2 = safe_snapshot_download('swift/test_lora2')
+    args = BaseArguments.from_pretrained(adapter_path)
+    if infer_backend == 'pt':
+        engine = PtEngine(args.model)
+    elif infer_backend == 'vllm':
+        from swift.llm import VllmEngine
+        engine = VllmEngine(args.model, enable_lora=True, max_loras=1, max_lora_rank=16)
+    template = get_template(args.template, engine.processor, args.system)
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    adapter_request = AdapterRequest('lora1', adapter_path)
+    adapter_request2 = AdapterRequest('lora2', adapter_path2)
+
+    # use lora
+    resp_list = engine.infer([infer_request], request_config, template=template, adapter_request=adapter_request)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora1-response: {response}')
+    # origin model
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'response: {response}')
+    # use lora
+    resp_list = engine.infer([infer_request], request_config, template=template, adapter_request=adapter_request2)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora2-response: {response}')
+
+
+def infer_lora(infer_request: 'InferRequest'):
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    adapter_path = safe_snapshot_download('swift/test_lora')
+    args = BaseArguments.from_pretrained(adapter_path)
+    # method1
+    # engine = PtEngine(args.model, adapters=[adapter_path])
+    # template = get_template(args.template, engine.tokenizer, args.system)
+    # engine.default_template = template
+
+    # method2
+    # model, processor = args.get_model_processor()
+    # model = Swift.from_pretrained(model, adapter_path)
+    # template = args.get_template(processor)
+    # engine = PtEngine.from_model_template(model, template)
+
+    # method3
+    model, tokenizer = get_model_tokenizer(args.model)
+    model = Swift.from_pretrained(model, adapter_path)
+    template = get_template(args.template, tokenizer, args.system)
+    engine = PtEngine.from_model_template(model, template)
+
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora-response: {response}')
+
+
+if __name__ == '__main__':
+    from swift.llm import (PtEngine, RequestConfig, AdapterRequest, get_template, BaseArguments, InferRequest,
+                           safe_snapshot_download, get_model_tokenizer)
+    from swift.tuners import Swift
+    infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
+    # infer_lora(infer_request)
+    infer_multilora(infer_request, 'pt')
diff --git a/examples/infer/demo_mllm.py b/examples/infer/demo_mllm.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fca560e44853c715a3f64c42fbb3ce87aceb225
--- /dev/null
+++ b/examples/infer/demo_mllm.py
@@ -0,0 +1,145 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from typing import List, Literal
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+
+def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']):
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    metric = InferStats()
+    resp_list = engine.infer(infer_requests, request_config, metrics=[metric])
+    query0 = infer_requests[0].messages[0]['content']
+    print(f'query0: {query0}')
+    print(f'response0: {resp_list[0].choices[0].message.content}')
+    print(f'metric: {metric.compute()}')
+    # metric.reset()  # reuse
+
+
+def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'):
+    request_config = RequestConfig(max_tokens=512, temperature=0, stream=True)
+    metric = InferStats()
+    gen_list = engine.infer([infer_request], request_config, metrics=[metric])
+    query = infer_request.messages[0]['content']
+    print(f'query: {query}\nresponse: ', end='')
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        print(resp.choices[0].delta.content, end='', flush=True)
+    print()
+    print(f'metric: {metric.compute()}')
+
+
+def get_message(mm_type: Literal['text', 'image', 'video', 'audio']):
+    if mm_type == 'text':
+        message = {'role': 'user', 'content': 'who are you?'}
+    elif mm_type == 'image':
+        message = {
+            'role':
+            'user',
+            'content': [
+                {
+                    'type': 'image',
+                    # url or local_path or PIL.Image or base64
+                    'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png'
+                },
+                {
+                    'type': 'text',
+                    'text': 'How many sheep are there in the picture?'
+                }
+            ]
+        }
+
+    elif mm_type == 'video':
+        message = {
+            'role':
+            'user',
+            'content': [{
+                'type': 'video',
+                'video': 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4'
+            }, {
+                'type': 'text',
+                'text': 'Describe this video.'
+            }]
+        }
+    elif mm_type == 'audio':
+        message = {
+            'role':
+            'user',
+            'content': [{
+                'type': 'audio',
+                'audio': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav'
+            }, {
+                'type': 'text',
+                'text': 'What does this audio say?'
+            }]
+        }
+    return message
+
+
+def get_data(mm_type: Literal['text', 'image', 'video', 'audio']):
+    data = {}
+    if mm_type == 'text':
+        messages = [{'role': 'user', 'content': 'who are you?'}]
+    elif mm_type == 'image':
+        # The number of <image> tags must be the same as len(images).
+        messages = [{'role': 'user', 'content': '<image>How many sheep are there in the picture?'}]
+        # Support URL/Path/base64/PIL.Image
+        data['images'] = ['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png']
+    elif mm_type == 'video':
+        messages = [{'role': 'user', 'content': '<video>Describe this video.'}]
+        data['videos'] = ['https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4']
+    elif mm_type == 'audio':
+        messages = [{'role': 'user', 'content': '<audio>What does this audio say?'}]
+        data['audios'] = ['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav']
+    data['messages'] = messages
+    return data
+
+
+if __name__ == '__main__':
+    # The inference of the trained model can be referred to as:
+    # https://github.com/modelscope/ms-swift/tree/main/examples/notebook
+    from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig, load_dataset
+    from swift.plugin import InferStats
+    infer_backend = 'pt'
+
+    if infer_backend == 'pt':
+        model = 'Qwen/Qwen2-Audio-7B-Instruct'
+        mm_type = 'audio'
+        engine = PtEngine(model, max_batch_size=64)
+    elif infer_backend == 'vllm':
+        # test env: vllm==0.7.3, transformers==4.49.*
+        # The meaning of environment variables can be found at:
+        # https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html#id17
+        from swift.llm import VllmEngine
+        os.environ['MAX_PIXELS'] = '1003520'
+        os.environ['VIDEO_MAX_PIXELS'] = '50176'
+        os.environ['FPS_MAX_FRAMES'] = '12'
+        model = 'Qwen/Qwen2.5-VL-3B-Instruct'
+        # If you encounter insufficient GPU memory, please reduce `max_model_len` and set `max_num_seqs=5`.
+        engine = VllmEngine(model, max_model_len=8192, limit_mm_per_prompt={'image': 5, 'video': 2})
+        mm_type = 'image'  # or 'video'
+    elif infer_backend == 'lmdeploy':
+        # test env: lmdeploy==0.7.1
+        from swift.llm import LmdeployEngine
+        model = 'OpenGVLab/InternVL2_5-1B'
+        engine = LmdeployEngine(model, vision_batch_size=8)
+        mm_type = 'image'  # or 'video'
+
+    # infer dataset
+    if mm_type == 'audio':
+        dataset = 'speech_asr/speech_asr_aishell1_trainsets:validation#1000'
+    elif mm_type == 'image':
+        dataset = 'AI-ModelScope/LaTeX_OCR:small#1000'
+    elif mm_type == 'video':
+        dataset = 'swift/VideoChatGPT:Generic#100'
+
+    # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset.
+    dataset = load_dataset([dataset], seed=42)[0]
+    print(f'dataset: {dataset}')
+    infer_requests = [InferRequest(**data) for data in dataset]
+    infer_batch(engine, infer_requests)
+
+    infer_stream(engine, InferRequest(messages=[get_message(mm_type)]))
+    # This writing is equivalent to the above writing.
+    infer_stream(engine, InferRequest(**get_data(mm_type)))
diff --git a/examples/infer/demo_reward_model.py b/examples/infer/demo_reward_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9911e91ebfb911990a86ee8dcf4122bc4934755
--- /dev/null
+++ b/examples/infer/demo_reward_model.py
@@ -0,0 +1,31 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from typing import List
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+
+def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']):
+    resp_list = engine.infer(infer_requests)
+    print(f'messages0: {infer_requests[0].messages}')
+    print(f'response0: {resp_list[0].choices[0].message.content}')
+
+
+if __name__ == '__main__':
+    from swift.llm import InferEngine, InferRequest, PtEngine, load_dataset
+    model = 'Shanghai_AI_Laboratory/internlm2-1_8b-reward'
+    engine = PtEngine(model, max_batch_size=64)
+    # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset.
+    dataset = load_dataset(['AI-ModelScope/alpaca-gpt4-data-zh#1000'], seed=42)[0]
+    print(f'dataset: {dataset}')
+    infer_requests = [InferRequest(**data) for data in dataset]
+    infer_batch(engine, infer_requests)
+
+    messages = [{
+        'role': 'user',
+        'content': "Hello! What's your name?"
+    }, {
+        'role': 'assistant',
+        'content': 'My name is InternLM2! A helpful AI assistant. What can I do for you?'
+    }]
+    infer_batch(engine, [InferRequest(messages=messages)])
diff --git a/examples/infer/lmdeploy/ddp.sh b/examples/infer/lmdeploy/ddp.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5b081511c068ad2f864ae1472ff113aba0ac8bd0
--- /dev/null
+++ b/examples/infer/lmdeploy/ddp.sh
@@ -0,0 +1,7 @@
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend lmdeploy \
+    --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
+    --max_new_tokens 2048
diff --git a/examples/infer/lmdeploy/mllm_tp.sh b/examples/infer/lmdeploy/mllm_tp.sh
new file mode 100644
index 0000000000000000000000000000000000000000..44cca4dd8c9ad63486924f90a85869ecff1e5d69
--- /dev/null
+++ b/examples/infer/lmdeploy/mllm_tp.sh
@@ -0,0 +1,8 @@
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --model OpenGVLab/InternVL2_5-1B \
+    --infer_backend lmdeploy \
+    --val_dataset AI-ModelScope/captcha-images#1000 \
+    --tp 2 \
+    --vision_batch_size 8 \
+    --max_new_tokens 2048
diff --git a/examples/infer/pt/batch_ddp.sh b/examples/infer/pt/batch_ddp.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1e4d800dfb10fbaa303879f1ae2f5b8c443f9b0a
--- /dev/null
+++ b/examples/infer/pt/batch_ddp.sh
@@ -0,0 +1,9 @@
+# 18GB
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift infer \
+    --model Qwen/Qwen2.5-1.5B-Instruct \
+    --infer_backend pt \
+    --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
+    --max_batch_size 16 \
+    --max_new_tokens 512
diff --git a/examples/infer/pt/bert.sh b/examples/infer/pt/bert.sh
new file mode 100644
index 0000000000000000000000000000000000000000..28fbc566aec2ae2556d0c134d0475d8179661f7b
--- /dev/null
+++ b/examples/infer/pt/bert.sh
@@ -0,0 +1,8 @@
+# Since `swift/test_lora` is trained by swift and contains an `args.json` file,
+# there is no need to explicitly set `--model`, `--system`, etc., as they will be automatically read.
+# To disable this behavior, please set `--load_args false`.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters swift/test_bert \
+    --truncation_strategy right \
+    --max_length 512
diff --git a/examples/infer/pt/lora.sh b/examples/infer/pt/lora.sh
new file mode 100644
index 0000000000000000000000000000000000000000..89b403c9bf8df86bf8109ef081d33be82df199b7
--- /dev/null
+++ b/examples/infer/pt/lora.sh
@@ -0,0 +1,10 @@
+# Since `swift/test_lora` is trained by swift and contains an `args.json` file,
+# there is no need to explicitly set `--model`, `--system`, etc., as they will be automatically read.
+# To disable this behavior, please set `--load_args false`.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters swift/test_lora \
+    --infer_backend pt \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
diff --git a/examples/infer/pt/mllm_device_map.sh b/examples/infer/pt/mllm_device_map.sh
new file mode 100644
index 0000000000000000000000000000000000000000..11495b56d7c1444a0367ab7ed36883251d93cd7c
--- /dev/null
+++ b/examples/infer/pt/mllm_device_map.sh
@@ -0,0 +1,9 @@
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+MAX_PIXELS=1003520 \
+swift infer \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --infer_backend pt \
+    --val_dataset AI-ModelScope/LaTeX_OCR#1000 \
+    --max_batch_size 16 \
+    --max_new_tokens 512
diff --git a/examples/infer/pt/prm.sh b/examples/infer/pt/prm.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e65e8622190c144bcac4a07aeb3a4d58a69d7faa
--- /dev/null
+++ b/examples/infer/pt/prm.sh
@@ -0,0 +1,4 @@
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model Qwen/Qwen2.5-Math-PRM-7B \
+    --infer_backend pt
diff --git a/examples/infer/pt/reward_model.sh b/examples/infer/pt/reward_model.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2d0b63d14096f3bac2da916e36078e567a063e21
--- /dev/null
+++ b/examples/infer/pt/reward_model.sh
@@ -0,0 +1,5 @@
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model Shanghai_AI_Laboratory/internlm2-1_8b-reward \
+    --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
+    --max_batch_size 64
diff --git a/examples/infer/vllm/ddp.sh b/examples/infer/vllm/ddp.sh
new file mode 100644
index 0000000000000000000000000000000000000000..848d6cc14d9b4d40c236b0ac6b86e5dd19570e86
--- /dev/null
+++ b/examples/infer/vllm/ddp.sh
@@ -0,0 +1,9 @@
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend vllm \
+    --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
+    --gpu_memory_utilization 0.9 \
+    --max_model_len 8192 \
+    --max_new_tokens 2048
diff --git a/examples/infer/vllm/mllm_ddp.sh b/examples/infer/vllm/mllm_ddp.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e33fedf56fc6cefab4030f54534a8348bd8b6518
--- /dev/null
+++ b/examples/infer/vllm/mllm_ddp.sh
@@ -0,0 +1,11 @@
+# You need to use flash-attn (manual installation) instead of xformers.
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --model Qwen/Qwen2-Audio-7B-Instruct \
+    --infer_backend vllm \
+    --val_dataset speech_asr/speech_asr_aishell1_trainsets:validation#1000 \
+    --gpu_memory_utilization 0.9 \
+    --max_model_len 8192 \
+    --max_new_tokens 2048 \
+    --limit_mm_per_prompt '{"audio": 5}'
diff --git a/examples/infer/vllm/mllm_tp.sh b/examples/infer/vllm/mllm_tp.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3e625a2a79246cfa283d593471ade0dd96fd657d
--- /dev/null
+++ b/examples/infer/vllm/mllm_tp.sh
@@ -0,0 +1,11 @@
+CUDA_VISIBLE_DEVICES=0,1 \
+MAX_PIXELS=1003520 \
+swift infer \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --infer_backend vllm \
+    --val_dataset AI-ModelScope/LaTeX_OCR#1000 \
+    --gpu_memory_utilization 0.9 \
+    --tensor_parallel_size 2 \
+    --max_model_len 32768 \
+    --max_new_tokens 2048 \
+    --limit_mm_per_prompt '{"image": 5, "video": 2}'
diff --git a/examples/notebook/qwen2_5-self-cognition/infer.ipynb b/examples/notebook/qwen2_5-self-cognition/infer.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..0d5d3a8d1e2b0e570e4a3b5073bd35535281cf33
--- /dev/null
+++ b/examples/notebook/qwen2_5-self-cognition/infer.ipynb
@@ -0,0 +1,148 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inference\n",
+    "We have trained a well-trained checkpoint through the `self-cognition-sft.ipynb` tutorial, and here we use `PtEngine` to do the inference on it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig, get_template"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for inference\n",
+    "last_model_checkpoint = 'output/checkpoint-xxx'\n",
+    "\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2.5-3B-Instruct'  # model_id or model_path\n",
+    "system = 'You are a helpful assistant.'\n",
+    "infer_backend = 'pt'\n",
+    "\n",
+    "# generation_config\n",
+    "max_new_tokens = 512\n",
+    "temperature = 0\n",
+    "stream = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get model and template, and load LoRA weights.\n",
+    "engine = PtEngine(model_id_or_path, adapters=[last_model_checkpoint])\n",
+    "template = get_template(engine.model_meta.template, engine.tokenizer, default_system=system)\n",
+    "# You can modify the `default_template` directly here, or pass it in during `engine.infer`.\n",
+    "engine.default_template = template"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "query: who are you?\n",
+      "response: I am an artificial intelligence language model named Xiao Huang, developed by ModelScope. I can answer various questions and engage in conversation with humans. If you have any questions or need help, feel free to ask me at any time.\n",
+      "--------------------------------------------------\n",
+      "query: What should I do if I can't sleep at night?\n",
+      "response: If you're having trouble sleeping, there are several things you can try:\n",
+      "\n",
+      "1. Establish a regular sleep schedule: Try to go to bed and wake up at the same time every day, even on weekends.\n",
+      "\n",
+      "2. Create a relaxing bedtime routine: Engage in calming activities before bed, such as reading a book or taking a warm bath.\n",
+      "\n",
+      "3. Make your bedroom conducive to sleep: Keep your bedroom cool, dark, and quiet. Invest in comfortable bedding and pillows.\n",
+      "\n",
+      "4. Avoid stimulating activities before bed: Avoid using electronic devices, watching TV, or engaging in mentally stimulating activities before bed.\n",
+      "\n",
+      "5. Exercise regularly: Regular physical activity can help improve your sleep quality, but avoid exercising too close to bedtime.\n",
+      "\n",
+      "6. Manage stress: Practice relaxation techniques, such as deep breathing, meditation, or yoga, to help manage stress and promote better sleep.\n",
+      "\n",
+      "7. Limit caffeine and alcohol intake: Both caffeine and alcohol can disrupt sleep patterns, so it's best to limit their consumption, especially in the evening.\n",
+      "\n",
+      "8. Seek professional help: If you continue to have difficulty sleeping despite trying these strategies, consider seeking help from a healthcare provider or a sleep specialist.\n",
+      "--------------------------------------------------\n",
+      "query: 你是谁训练的？\n",
+      "response: 我是由魔搭团队训练和开发的。\n",
+      "--------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "query_list = [\n",
+    "    'who are you?',\n",
+    "    \"What should I do if I can't sleep at night?\",\n",
+    "    '你是谁训练的？',\n",
+    "]\n",
+    "\n",
+    "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature, stream=True)\n",
+    "    gen_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    print(f'query: {query}\\nresponse: ', end='')\n",
+    "    for resp in gen_list[0]:\n",
+    "        if resp is None:\n",
+    "            continue\n",
+    "        print(resp.choices[0].delta.content, end='', flush=True)\n",
+    "    print()\n",
+    "\n",
+    "def infer(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature)\n",
+    "    resp_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    response = resp_list[0].choices[0].message.content\n",
+    "    print(f'query: {query}')\n",
+    "    print(f'response: {response}')\n",
+    "\n",
+    "infer_func = infer_stream if stream else infer\n",
+    "for query in query_list:\n",
+    "    infer_func(engine, InferRequest(messages=[{'role': 'user', 'content': query}]))\n",
+    "    print('-' * 50)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test_py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/notebook/qwen2_5-self-cognition/infer.sh b/examples/notebook/qwen2_5-self-cognition/infer.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d957257cb17b296a03c81e4cab6630f536471639
--- /dev/null
+++ b/examples/notebook/qwen2_5-self-cognition/infer.sh
@@ -0,0 +1,7 @@
+# Here is the command-line style inference code.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
diff --git a/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb b/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..885854b4e0a7dac4eda57bad4bd6bdc3370a45d8
--- /dev/null
+++ b/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb
@@ -0,0 +1,219 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 10-minute self-cognition SFT\n",
+    "\n",
+    "Here is a demonstration of using python to perform self-cognition SFT of Qwen2.5-3B-Instruct. Through this tutorial, you can quickly understand some details of swift sft, which will be of great help in customizing ms-swift for you~\n",
+    "\n",
+    "Are you ready? Let's begin the journey...\n",
+    "\n",
+    "中文版：[魔搭教程](https://github.com/modelscope/modelscope-classroom/blob/main/LLM-tutorial/R.10%E5%88%86%E9%92%9F%E6%94%B9%E5%8F%98%E5%A4%A7%E6%A8%A1%E5%9E%8B%E8%87%AA%E6%88%91%E8%AE%A4%E7%9F%A5.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# # install ms-swift\n",
+    "# pip install ms-swift -U"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import get_model_tokenizer, load_dataset, get_template, EncodePreprocessor\n",
+    "from swift.utils import get_logger, find_all_linears, get_model_parameter_info, plot_images, seed_everything\n",
+    "from swift.tuners import Swift, LoraConfig\n",
+    "from swift.trainers import Seq2SeqTrainer, Seq2SeqTrainingArguments\n",
+    "from functools import partial\n",
+    "\n",
+    "logger = get_logger()\n",
+    "seed_everything(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for training\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2.5-3B-Instruct'  # model_id or model_path\n",
+    "system = 'You are a helpful assistant.'\n",
+    "output_dir = 'output'\n",
+    "\n",
+    "# dataset\n",
+    "dataset = ['AI-ModelScope/alpaca-gpt4-data-zh#500', 'AI-ModelScope/alpaca-gpt4-data-en#500',\n",
+    "           'swift/self-cognition#500']  # dataset_id or dataset_path\n",
+    "data_seed = 42\n",
+    "max_length = 2048\n",
+    "split_dataset_ratio = 0.01  # Split validation set\n",
+    "num_proc = 4  # The number of processes for data loading.\n",
+    "# The following two parameters are used to override the placeholders in the self-cognition dataset.\n",
+    "model_name = ['小黄', 'Xiao Huang']  # The Chinese name and English name of the model\n",
+    "model_author = ['魔搭', 'ModelScope']  # The Chinese name and English name of the model author\n",
+    "\n",
+    "# lora\n",
+    "lora_rank = 8\n",
+    "lora_alpha = 32\n",
+    "\n",
+    "# training_args\n",
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=output_dir,\n",
+    "    learning_rate=1e-4,\n",
+    "    per_device_train_batch_size=1,\n",
+    "    per_device_eval_batch_size=1,\n",
+    "    gradient_checkpointing=True,\n",
+    "    weight_decay=0.1,\n",
+    "    lr_scheduler_type='cosine',\n",
+    "    warmup_ratio=0.05,\n",
+    "    report_to=['tensorboard'],\n",
+    "    logging_first_step=True,\n",
+    "    save_strategy='steps',\n",
+    "    save_steps=50,\n",
+    "    eval_strategy='steps',\n",
+    "    eval_steps=50,\n",
+    "    gradient_accumulation_steps=16,\n",
+    "    num_train_epochs=1,\n",
+    "    metric_for_best_model='loss',\n",
+    "    save_total_limit=2,\n",
+    "    logging_steps=5,\n",
+    "    dataloader_num_workers=1,\n",
+    "    data_seed=data_seed,\n",
+    ")\n",
+    "\n",
+    "output_dir = os.path.abspath(os.path.expanduser(output_dir))\n",
+    "logger.info(f'output_dir: {output_dir}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Obtain the model and template, and add a trainable Lora layer on the model.\n",
+    "model, tokenizer = get_model_tokenizer(model_id_or_path)\n",
+    "logger.info(f'model_info: {model.model_info}')\n",
+    "template = get_template(model.model_meta.template, tokenizer, default_system=system, max_length=max_length)\n",
+    "template.set_mode('train')\n",
+    "\n",
+    "target_modules = find_all_linears(model)\n",
+    "lora_config = LoraConfig(task_type='CAUSAL_LM', r=lora_rank, lora_alpha=lora_alpha,\n",
+    "                         target_modules=target_modules)\n",
+    "model = Swift.prepare_model(model, lora_config)\n",
+    "logger.info(f'lora_config: {lora_config}')\n",
+    "\n",
+    "# Print model structure and trainable parameters.\n",
+    "logger.info(f'model: {model}')\n",
+    "model_parameter_info = get_model_parameter_info(model)\n",
+    "logger.info(f'model_parameter_info: {model_parameter_info}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download and load the dataset, split it into a training set and a validation set,\n",
+    "# and encode the text data into tokens.\n",
+    "train_dataset, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,\n",
+    "        model_name=model_name, model_author=model_author, seed=data_seed)\n",
+    "\n",
+    "logger.info(f'train_dataset: {train_dataset}')\n",
+    "logger.info(f'val_dataset: {val_dataset}')\n",
+    "logger.info(f'train_dataset[0]: {train_dataset[0]}')\n",
+    "\n",
+    "train_dataset = EncodePreprocessor(template=template)(train_dataset, num_proc=num_proc)\n",
+    "val_dataset = EncodePreprocessor(template=template)(val_dataset, num_proc=num_proc)\n",
+    "logger.info(f'encoded_train_dataset[0]: {train_dataset[0]}')\n",
+    "\n",
+    "# Print a sample\n",
+    "template.print_inputs(train_dataset[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the trainer and start the training.\n",
+    "model.enable_input_require_grads()  # Compatible with gradient checkpointing\n",
+    "trainer = Seq2SeqTrainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    data_collator=template.data_collator,\n",
+    "    train_dataset=train_dataset,\n",
+    "    eval_dataset=val_dataset,\n",
+    "    template=template,\n",
+    ")\n",
+    "trainer.train()\n",
+    "\n",
+    "last_model_checkpoint = trainer.state.last_model_checkpoint\n",
+    "logger.info(f'last_model_checkpoint: {last_model_checkpoint}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize the training loss.\n",
+    "# You can also use the TensorBoard visualization interface during training by entering\n",
+    "# `tensorboard --logdir '{output_dir}/runs'` at the command line.\n",
+    "images_dir = os.path.join(output_dir, 'images')\n",
+    "logger.info(f'images_dir: {images_dir}')\n",
+    "plot_images(images_dir, training_args.logging_dir, ['train/loss'], 0.9)  # save images\n",
+    "\n",
+    "# Read and display the image.\n",
+    "# The light yellow line represents the actual loss value,\n",
+    "# while the yellow line represents the loss value smoothed with a smoothing factor of 0.9.\n",
+    "from IPython.display import display\n",
+    "from PIL import Image\n",
+    "image = Image.open(os.path.join(images_dir, 'train_loss.png'))\n",
+    "display(image)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/notebook/qwen2_5-self-cognition/sft.sh b/examples/notebook/qwen2_5-self-cognition/sft.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5881365a0c0ff227f6eb20025bc47f5e5f69172c
--- /dev/null
+++ b/examples/notebook/qwen2_5-self-cognition/sft.sh
@@ -0,0 +1,30 @@
+# Here is the command-line style training code.
+# 22GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-3B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --model_name 小黄 'Xiao Huang' \
+    --model_author '魔搭' 'ModelScope'
diff --git a/examples/notebook/qwen2_5-vl-grounding/zh.ipynb b/examples/notebook/qwen2_5-vl-grounding/zh.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b16b037260bb43236ce065b8fbbda43c5c964f98
--- /dev/null
+++ b/examples/notebook/qwen2_5-vl-grounding/zh.ipynb
@@ -0,0 +1,261 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Qwen2.5-VL Grounding任务\n",
+    "\n",
+    "这里介绍使用qwen2.5-vl进行grounding任务的全流程介绍。当然，你也可以使用internvl2.5或者qwen2-vl等多模态模型。\n",
+    "\n",
+    "我们使用[AI-ModelScope/coco](https://modelscope.cn/datasets/AI-ModelScope/coco)数据集来展示整个流程。\n",
+    "\n",
+    "如果需要使用自定义数据集，需要符合以下格式："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "{\"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, {\"role\": \"user\", \"content\": \"<image>描述图像\"}, {\"role\": \"assistant\", \"content\": \"<ref-object><bbox>和<ref-object><bbox>正在沙滩上玩耍\"}], \"images\": [\"/xxx/x.jpg\"], \"objects\": {\"ref\": [\"一只狗\", \"一个女人\"], \"bbox\": [[331.5, 761.4, 853.5, 1594.8], [676.5, 685.8, 1099.5, 1427.4]]}}\n",
+    "{\"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, {\"role\": \"user\", \"content\": \"<image>找到图像中的<ref-object>\"}, {\"role\": \"assistant\", \"content\": \"<bbox><bbox>\"}], \"images\": [\"/xxx/x.jpg\"], \"objects\": {\"ref\": [\"羊\"], \"bbox\": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}\n",
+    "{\"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, {\"role\": \"user\", \"content\": \"<image>帮我打开谷歌浏览器\"}, {\"role\": \"assistant\", \"content\": \"Action: click(start_box='<bbox>')\"}], \"images\": [\"/xxx/x.jpg\"], \"objects\": {\"ref\": [], \"bbox\": [[615, 226]]}}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ms-swift在预处理数据集时，会使用模型特有的grounding任务格式，将objects中的ref填充`<ref-object>`，bbox会根据模型类型选择是否进行0-1000的归一化，并填充`<bbox>`。例如：qwen2-vl为`f'<|object_ref_start|>羊<|object_ref_end|>'`和`f'<|box_start|>(101,201),(150,266)<|box_end|>'`（qwen2.5-vl不进行归一化，只将float型转成int型），internvl2.5则为`f'<ref>羊</ref>'`和`f'<box>[[101, 201, 150, 266]]</box>'`等。\n",
+    "\n",
+    "\n",
+    "训练之前，你需要从main分支安装ms-swift："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# pip install git+https://github.com/modelscope/ms-swift.git\n",
+    "\n",
+    "git clone https://github.com/modelscope/ms-swift.git\n",
+    "cd ms-swift\n",
+    "pip install -e .\n",
+    "\n",
+    "# 如果'transformers>=4.49'已经发版，则无需从main分支安装\n",
+    "pip install git+https://github.com/huggingface/transformers.git"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "然后，使用以下shell进行训练。MAX_PIXELS的参数含义可以查看[这里](https://swift.readthedocs.io/en/latest/Instruction/Command-line-parameters.html#specific-model-arguments)\n",
+    "\n",
+    "### 训练\n",
+    "\n",
+    "单卡训练："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# 显存资源：24GiB\n",
+    "CUDA_VISIBLE_DEVICES=0 \\\n",
+    "MAX_PIXELS=1003520 \\\n",
+    "swift sft \\\n",
+    "    --model Qwen/Qwen2.5-VL-7B-Instruct \\\n",
+    "    --dataset 'AI-ModelScope/coco#2000' \\\n",
+    "    --train_type lora \\\n",
+    "    --torch_dtype bfloat16 \\\n",
+    "    --num_train_epochs 1 \\\n",
+    "    --per_device_train_batch_size 1 \\\n",
+    "    --per_device_eval_batch_size 1 \\\n",
+    "    --learning_rate 1e-4 \\\n",
+    "    --lora_rank 8 \\\n",
+    "    --lora_alpha 32 \\\n",
+    "    --target_modules all-linear \\\n",
+    "    --freeze_vit true \\\n",
+    "    --gradient_accumulation_steps 16 \\\n",
+    "    --eval_steps 100 \\\n",
+    "    --save_steps 100 \\\n",
+    "    --save_total_limit 5 \\\n",
+    "    --logging_steps 5 \\\n",
+    "    --max_length 2048 \\\n",
+    "    --output_dir output \\\n",
+    "    --warmup_ratio 0.05 \\\n",
+    "    --dataloader_num_workers 4 \\\n",
+    "    --dataset_num_proc 4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "然后我们将训练的模型推送到ModelScope："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "swift export \\\n",
+    "    --adapters output/vx-xxx/checkpoint-xxx \\\n",
+    "    --push_to_hub true \\\n",
+    "    --hub_model_id '<model-id>' \\\n",
+    "    --hub_token '<sdk-token>' \\\n",
+    "    --use_hf false"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "我们将训练的checkpoint推送到[swift/test_grounding](https://modelscope.cn/models/swift/test_grounding)。\n",
+    "\n",
+    "### 推理\n",
+    "\n",
+    "训练完成后，我们使用以下命令对训练时的验证集进行推理。这里`--adapters`需要替换成训练生成的last checkpoint文件夹。由于adapters文件夹中包含了训练的参数文件，因此不需要额外指定`--model`。\n",
+    "\n",
+    "若模型采用的是绝对坐标的方式进行输出，推理时请提前对图像进行缩放而不使用`MAX_PIXELS`或者`--max_pixels`。若是千分位坐标，则没有此约束。\n",
+    "\n",
+    "由于我们已经将训练后的checkpoint推送到了ModelScope上，以下推理脚本可以直接运行："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "CUDA_VISIBLE_DEVICES=0 \\\n",
+    "swift infer \\\n",
+    "    --adapters swift/test_grounding \\\n",
+    "    --stream true \\\n",
+    "    --load_data_args true \\\n",
+    "    --max_new_tokens 512 \\\n",
+    "    --dataset_num_proc 4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "我们也可以使用代码的方式进行推理：\n",
+    "\n",
+    "单样本推理的例子可以查看[这里](https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_grounding.py)。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "import re\n",
+    "from typing import Literal\n",
+    "from swift.llm import (\n",
+    "    PtEngine, RequestConfig, BaseArguments, InferRequest, safe_snapshot_download, draw_bbox, load_image, load_dataset, InferEngine\n",
+    ")\n",
+    "from IPython.display import display\n",
+    "\n",
+    "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=512, temperature=0, stream=True)\n",
+    "    gen_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    print(f'query: {query}\\nresponse: ', end='')\n",
+    "    response = ''\n",
+    "    for resp in gen_list[0]:\n",
+    "        if resp is None:\n",
+    "            continue\n",
+    "        delta = resp.choices[0].delta.content\n",
+    "        response += delta\n",
+    "        print(delta, end='', flush=True)\n",
+    "    print()\n",
+    "    return response\n",
+    "\n",
+    "def draw_bbox_qwen2_vl(image, response, norm_bbox: Literal['norm1000', 'none']):\n",
+    "    matches = re.findall(\n",
+    "        r'<\\|object_ref_start\\|>(.*?)<\\|object_ref_end\\|><\\|box_start\\|>\\((\\d+),(\\d+)\\),\\((\\d+),(\\d+)\\)<\\|box_end\\|>',\n",
+    "        response)\n",
+    "    ref = []\n",
+    "    bbox = []\n",
+    "    for match_ in matches:\n",
+    "        ref.append(match_[0])\n",
+    "        bbox.append(list(match_[1:]))\n",
+    "    draw_bbox(image, ref, bbox, norm_bbox=norm_bbox)\n",
+    "\n",
+    "# 下载权重，并加载模型\n",
+    "output_dir = 'images_bbox'\n",
+    "model_id_or_path = 'swift/test_grounding'\n",
+    "output_dir = os.path.abspath(os.path.expanduser(output_dir))\n",
+    "adapter_path = safe_snapshot_download(model_id_or_path)\n",
+    "args = BaseArguments.from_pretrained(adapter_path)\n",
+    "engine = PtEngine(args.model, adapters=[adapter_path])\n",
+    "\n",
+    "# 获取验证集并推理\n",
+    "_, val_dataset = load_dataset(args.dataset, split_dataset_ratio=args.split_dataset_ratio, num_proc=4, seed=args.seed)\n",
+    "print(f'output_dir: {output_dir}')\n",
+    "os.makedirs(output_dir, exist_ok=True)\n",
+    "for i, data in enumerate(val_dataset):\n",
+    "    image = data['images'][0]\n",
+    "    image = load_image(image['bytes'] or image['path'])\n",
+    "    display(image)\n",
+    "    response = infer_stream(engine, InferRequest(**data))\n",
+    "    draw_bbox_qwen2_vl(image, response, norm_bbox=args.norm_bbox)\n",
+    "    print('-' * 50)\n",
+    "    image.save(os.path.join(output_dir, f'{i}.png'))\n",
+    "    display(image)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test_py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/notebook/qwen2vl-ocr/infer.ipynb b/examples/notebook/qwen2vl-ocr/infer.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..931c304228485c8f4641527d143505fb12a7b0eb
--- /dev/null
+++ b/examples/notebook/qwen2vl-ocr/infer.ipynb
@@ -0,0 +1,136 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inference\n",
+    "We have trained a well-trained checkpoint through the `ocr-sft.ipynb` tutorial, and here we use `PtEngine` to do the inference on it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import (\n",
+    "    InferEngine, InferRequest, PtEngine, RequestConfig, get_template, load_dataset, load_image\n",
+    ")\n",
+    "from swift.utils import get_model_parameter_info, get_logger, seed_everything\n",
+    "logger = get_logger()\n",
+    "seed_everything(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for inference\n",
+    "last_model_checkpoint = 'output/checkpoint-xxx'\n",
+    "\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2-VL-2B-Instruct'  # model_id or model_path\n",
+    "system = None\n",
+    "infer_backend = 'pt'\n",
+    "\n",
+    "# dataset\n",
+    "dataset = ['AI-ModelScope/LaTeX_OCR#20000']\n",
+    "data_seed = 42\n",
+    "split_dataset_ratio = 0.01\n",
+    "num_proc = 4\n",
+    "strict = False\n",
+    "\n",
+    "# generation_config\n",
+    "max_new_tokens = 512\n",
+    "temperature = 0\n",
+    "stream = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get model and template, and load LoRA weights.\n",
+    "engine = PtEngine(model_id_or_path, adapters=[last_model_checkpoint])\n",
+    "template = get_template(engine.model_meta.template, engine.tokenizer, default_system=system)\n",
+    "# The default mode of the template is 'pt', so there is no need to make any changes.\n",
+    "# template.set_mode('pt')\n",
+    "\n",
+    "model_parameter_info = get_model_parameter_info(engine.model)\n",
+    "logger.info(f'model_parameter_info: {model_parameter_info}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Due to the data_seed setting, the validation set here is the same as the validation set used during training.\n",
+    "_, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,\n",
+    "                              strict=strict, seed=data_seed)\n",
+    "val_dataset = val_dataset.select(range(10))  # Take the first 10 items"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Streaming inference and save images from the validation set.\n",
+    "# The batch processing code can be found here: https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_mllm.py\n",
+    "def infer_stream(engine: InferEngine, infer_request: InferRequest):\n",
+    "    request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature, stream=True)\n",
+    "    gen_list = engine.infer([infer_request], request_config)\n",
+    "    query = infer_request.messages[0]['content']\n",
+    "    print(f'query: {query}\\nresponse: ', end='')\n",
+    "    for resp in gen_list[0]:\n",
+    "        if resp is None:\n",
+    "            continue\n",
+    "        print(resp.choices[0].delta.content, end='', flush=True)\n",
+    "    print()\n",
+    "\n",
+    "from IPython.display import display\n",
+    "os.makedirs('images', exist_ok=True)\n",
+    "for i, data in enumerate(val_dataset):\n",
+    "    image = data['images'][0]\n",
+    "    image = load_image(image['bytes'] or image['path'])\n",
+    "    image.save(f'images/{i}.png')\n",
+    "    display(image)\n",
+    "    infer_stream(engine, InferRequest(**data))\n",
+    "    print('-' * 50)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test_py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/notebook/qwen2vl-ocr/ocr-sft.ipynb b/examples/notebook/qwen2vl-ocr/ocr-sft.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..980c5b3f42d5fcba85af5890c8faa7b00dd8cb15
--- /dev/null
+++ b/examples/notebook/qwen2vl-ocr/ocr-sft.ipynb
@@ -0,0 +1,226 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Latex-OCR SFT\n",
+    "\n",
+    "Here is a demonstration of using python to perform Latex-OCR SFT of Qwen2-VL-2B-Instruct. Through this tutorial, you can quickly understand some details of swift sft, which will be of great help in customizing ms-swift for you~\n",
+    "\n",
+    "Are you ready? Let's begin the journey..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# # install ms-swift\n",
+    "# pip install ms-swift -U"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import some libraries\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+    "\n",
+    "from swift.llm import (\n",
+    "    get_model_tokenizer, load_dataset, get_template, EncodePreprocessor, get_model_arch,\n",
+    "    get_multimodal_target_regex, LazyLLMDataset\n",
+    ")\n",
+    "from swift.utils import get_logger, get_model_parameter_info, plot_images, seed_everything\n",
+    "from swift.tuners import Swift, LoraConfig\n",
+    "from swift.trainers import Seq2SeqTrainer, Seq2SeqTrainingArguments\n",
+    "from functools import partial\n",
+    "\n",
+    "logger = get_logger()\n",
+    "seed_everything(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters for training\n",
+    "# model\n",
+    "model_id_or_path = 'Qwen/Qwen2-VL-2B-Instruct'\n",
+    "system = None  # Using the default system defined in the template.\n",
+    "output_dir = 'output'\n",
+    "\n",
+    "# dataset\n",
+    "dataset = ['AI-ModelScope/LaTeX_OCR#20000']  # dataset_id or dataset_path. Sampling 20000 data points\n",
+    "data_seed = 42\n",
+    "max_length = 2048\n",
+    "split_dataset_ratio = 0.01  # Split validation set\n",
+    "num_proc = 4  # The number of processes for data loading.\n",
+    "\n",
+    "# lora\n",
+    "lora_rank = 8\n",
+    "lora_alpha = 32\n",
+    "freeze_llm = False\n",
+    "freeze_vit = True\n",
+    "freeze_aligner = True\n",
+    "\n",
+    "# training_args\n",
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=output_dir,\n",
+    "    learning_rate=1e-4,\n",
+    "    per_device_train_batch_size=1,\n",
+    "    per_device_eval_batch_size=1,\n",
+    "    gradient_checkpointing=True,\n",
+    "    weight_decay=0.1,\n",
+    "    lr_scheduler_type='cosine',\n",
+    "    warmup_ratio=0.05,\n",
+    "    report_to=['tensorboard'],\n",
+    "    logging_first_step=True,\n",
+    "    save_strategy='steps',\n",
+    "    save_steps=50,\n",
+    "    eval_strategy='steps',\n",
+    "    eval_steps=50,\n",
+    "    gradient_accumulation_steps=16,\n",
+    "    # To observe the training results more quickly, this is set to 1 here. \n",
+    "    # Under normal circumstances, a larger number should be used.\n",
+    "    num_train_epochs=1,\n",
+    "    metric_for_best_model='loss',\n",
+    "    save_total_limit=5,\n",
+    "    logging_steps=5,\n",
+    "    dataloader_num_workers=4,\n",
+    "    data_seed=data_seed,\n",
+    "    remove_unused_columns=False,\n",
+    ")\n",
+    "\n",
+    "output_dir = os.path.abspath(os.path.expanduser(output_dir))\n",
+    "logger.info(f'output_dir: {output_dir}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Obtain the model and template\n",
+    "model, processor = get_model_tokenizer(model_id_or_path)\n",
+    "logger.info(f'model_info: {model.model_info}')\n",
+    "template = get_template(model.model_meta.template, processor, default_system=system, max_length=max_length)\n",
+    "template.set_mode('train')\n",
+    "if template.use_model:\n",
+    "    template.model = model\n",
+    "\n",
+    "# Get target_modules and add trainable LoRA modules to the model.\n",
+    "target_modules = get_multimodal_target_regex(model, freeze_llm=freeze_llm, freeze_vit=freeze_vit, \n",
+    "                            freeze_aligner=freeze_aligner)\n",
+    "lora_config = LoraConfig(task_type='CAUSAL_LM', r=lora_rank, lora_alpha=lora_alpha,\n",
+    "                         target_modules=target_modules)\n",
+    "model = Swift.prepare_model(model, lora_config)\n",
+    "logger.info(f'lora_config: {lora_config}')\n",
+    "\n",
+    "# Print model structure and trainable parameters.\n",
+    "logger.info(f'model: {model}')\n",
+    "model_parameter_info = get_model_parameter_info(model)\n",
+    "logger.info(f'model_parameter_info: {model_parameter_info}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download and load the dataset, split it into a training set and a validation set,\n",
+    "# and encode the text data into tokens.\n",
+    "train_dataset, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,\n",
+    "                                          seed=data_seed)\n",
+    "\n",
+    "logger.info(f'train_dataset: {train_dataset}')\n",
+    "logger.info(f'val_dataset: {val_dataset}')\n",
+    "logger.info(f'train_dataset[0]: {train_dataset[0]}')\n",
+    "\n",
+    "train_dataset = LazyLLMDataset(train_dataset, template.encode, random_state=data_seed)\n",
+    "val_dataset = LazyLLMDataset(val_dataset, template.encode, random_state=data_seed)\n",
+    "data = train_dataset[0]\n",
+    "logger.info(f'encoded_train_dataset[0]: {data}')\n",
+    "\n",
+    "template.print_inputs(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the trainer and start the training.\n",
+    "model.enable_input_require_grads()  # Compatible with gradient checkpointing\n",
+    "trainer = Seq2SeqTrainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    data_collator=template.data_collator,\n",
+    "    train_dataset=train_dataset,\n",
+    "    eval_dataset=val_dataset,\n",
+    "    template=template,\n",
+    ")\n",
+    "trainer.train()\n",
+    "\n",
+    "last_model_checkpoint = trainer.state.last_model_checkpoint\n",
+    "logger.info(f'last_model_checkpoint: {last_model_checkpoint}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize the training loss.\n",
+    "# You can also use the TensorBoard visualization interface during training by entering\n",
+    "# `tensorboard --logdir '{output_dir}/runs'` at the command line.\n",
+    "images_dir = os.path.join(output_dir, 'images')\n",
+    "logger.info(f'images_dir: {images_dir}')\n",
+    "plot_images(images_dir, training_args.logging_dir, ['train/loss'], 0.9)  # save images\n",
+    "\n",
+    "# Read and display the image.\n",
+    "# The light yellow line represents the actual loss value,\n",
+    "# while the yellow line represents the loss value smoothed with a smoothing factor of 0.9.\n",
+    "from IPython.display import display\n",
+    "from PIL import Image\n",
+    "image = Image.open(os.path.join(images_dir, 'train_loss.png'))\n",
+    "display(image)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/sampler/distill/distill.sh b/examples/sampler/distill/distill.sh
new file mode 100644
index 0000000000000000000000000000000000000000..895017b903c78b4e0652e72a1ad96d14b85db3c2
--- /dev/null
+++ b/examples/sampler/distill/distill.sh
@@ -0,0 +1,11 @@
+OPENAI_API_KEY="xxx" \
+swift sample \
+    --sampler_type distill \
+    --sampler_engine client \
+    --model deepseek-r1 \
+    --stream true \
+    --dataset tastelikefeet/competition_math#5 \
+    --num_return_sequences 1 \
+    --temperature 0.6 \
+    --top_p 0.95 \
+    --engine_kwargs '{"base_url":"https://dashscope.aliyuncs.com/compatible-mode/v1"}'
diff --git a/examples/sampler/mcts/mcts.py b/examples/sampler/mcts/mcts.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fc9ac0958cc6d888070829725081b54726b97e3
--- /dev/null
+++ b/examples/sampler/mcts/mcts.py
@@ -0,0 +1,116 @@
+import os
+import subprocess
+import time
+from typing import List
+
+import json
+from modelscope.msdatasets import MsDataset
+
+conda_prefix = ''
+
+
+def client_sample(model: str, orm: str, dataset_path: str, iter: int, device_count: int, output_dir: str):
+    handlers = []
+    # Sampling cache
+    api_key = os.getenv('DASHSCOPE_API_KEY')
+
+    for device in range(device_count):
+
+        output_file = f'iter_{iter}_proc_{device}.jsonl'
+        cache_file = f'iter_{iter}_proc_{device}_cache.jsonl'
+        dataset = f'train_{device:02}.jsonl'
+
+        # output_file_path = os.path.join(output_dir, output_file)
+        cache_file_path = os.path.join(output_dir, cache_file)
+        single_dataset_path = os.path.join(dataset_path, dataset)
+
+        if not os.path.exists(cache_file_path):
+            open(cache_file_path, 'w').close()
+        sample_cmd = (f'USE_OPENCOMPASS_EVALUATOR=True '
+                      f'swift sample '
+                      f'--model {model} '
+                      f'--orm_model {orm} '
+                      f'--sampler_type mcts '
+                      f'--process_reward_rate 0 '
+                      f'--stop_words ки '
+                      f'--seed 42 '
+                      f'--api_key {api_key} '
+                      f'--dataset {single_dataset_path} '
+                      f'--max_length 2048 '
+                      f'--system ./scripts/sampler/system_prompt.txt '
+                      f'--load_args false '
+                      f'--sampler_engine client '
+                      f'--max_new_tokens 768 '
+                      f'--override_exist_file true '
+                      f'--num_sampling_per_gpu_batch_size 1 '
+                      f'--num_return_sequences 8 '
+                      f'--exploration_rate 0.2 '
+                      f'--max_iterations 200 '
+                      f'--output_dir {output_dir} '
+                      f'--cache_files {cache_file} '
+                      f'--output_file {output_file} '
+                      f'--temperature 1.0 ')
+        print(f'Sampling caches of iter {iter}, part {device}.', flush=True)
+        # env['CUDA_VISIBLE_DEVICES'] = str(device)
+        handler = subprocess.Popen(
+            f'{sample_cmd}' + f' > mcts_logs/sample_iter_{iter}_proc_{device}_cache.log 2>&1',
+            env=os.environ.copy(),
+            shell=True,
+            executable='/bin/bash')
+        handlers.append(handler)
+
+    datasets = []
+    for proc, handler in enumerate(handlers):
+        handler.wait()
+        assert os.path.exists(os.path.join(output_dir, f'iter_{iter}_proc_{proc}.jsonl'))
+        datasets.append(os.path.join('sample_output', f'iter_{iter}_proc_{proc}.jsonl'))
+    print(f'Sampling done, files:{datasets}', flush=True)
+
+
+def split_dataset(ds, split_size, out_path):
+    data_size = int(len(ds) / split_size) + 1
+
+    for i in range(split_size):
+        file_name = f'train_{i:02}.jsonl'
+        file_path = os.path.join(out_path, file_name)
+        print(file_path)
+        ds_split = ds[data_size * i:min(data_size * (i + 1), len(ds))]
+        print(f"split_size: {len(ds_split['problem'])}")
+        with open(file_path, 'w', encoding='utf-8') as file:
+            for problem, solution in zip(ds_split['problem'], ds_split['solution']):
+                message = {
+                    'messages': [
+                        {
+                            'role': 'user',
+                            'content': problem,
+                        },
+                        {
+                            'role': 'assistant',
+                            'content': solution,
+                        },
+                    ]
+                }
+                file.write(json.dumps(message, ensure_ascii=False) + '\n')
+
+
+def main():
+    server_model = 'qwen-max'
+    orm = 'math'
+    device_count = 20
+    output_dir = 'output/sampler/client_mcts/'
+    dataset_dir = 'datasets/competition_math/'
+    log_dir = 'mcts_logs/'
+
+    os.makedirs(output_dir, exist_ok=True)
+    os.makedirs(dataset_dir, exist_ok=True)
+    os.makedirs(log_dir, exist_ok=True)
+    ds = MsDataset.load('tastelikefeet/competition_math', subset_name='default', split='train')
+    split_dataset(ds, device_count, dataset_dir)
+
+    ts = time.time()
+    client_sample(server_model, orm, dataset_dir, 0, device_count, output_dir)
+    print(f'do sample cost: {(time.time() - ts) / 60:.1f} minutes.', flush=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/sampler/mcts/mcts.sh b/examples/sampler/mcts/mcts.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6b91ab10b973314b25e0c4d3caa42f07474f3898
--- /dev/null
+++ b/examples/sampler/mcts/mcts.sh
@@ -0,0 +1,35 @@
+export CUDA_VISIBLE_DEVICES=0
+export USE_OPENCOMPASS_EVALUATOR=True
+
+swift sample \
+    --model ./output/Qwen2.5-Math-7B-Instruct/v40-20250126-161112/checkpoint-20 \
+    --orm_model math \
+    --sampler_type mcts \
+    --sampler_engine vllm \
+    --output_dir ./output/sampler/mcts \
+    --system ./examples/sampler/system_prompt.txt \
+    --stop_words ки \
+    --dataset ./datasets/competition_math/small_test.jsonl \
+    --num_return_sequences 2 \
+    --process_reward_rate 0 \
+    --max_new_tokens 2048
+
+## Train
+# nproc_per_node=8
+# NPROC_PER_NODE=$nproc_per_node \
+# swift sft \
+#     --model Qwen/Qwen2.5-Math-7B-Instruct \
+#     --train_type full \
+#     --torch_dtype bfloat16 \
+#     --dataset 'datasets/gen_V5.jsonl' \
+#     --num_train_epochs 1 \
+#     --per_device_train_batch_size 1 \
+#     --learning_rate 1e-5 \
+#     --gradient_accumulation_steps $(expr 128 / $nproc_per_node) \
+#     --eval_steps 1000 \
+#     --save_steps 10 \
+#     --save_total_limit 100 \
+#     --max_length 10000 \
+#     --logging_steps 5 \
+#     --gradient_checkpointing_kwargs '{"use_reentrant": false}' \
+#     --deepspeed zero3
diff --git a/examples/sampler/mcts/system_prompt.txt b/examples/sampler/mcts/system_prompt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a52891c4d5c9fc3fb64eb975bbcdd454348131bc
--- /dev/null
+++ b/examples/sampler/mcts/system_prompt.txt
@@ -0,0 +1,7 @@
+You are a math model, you should **think step by step** carefully. Each step should **end with \"ки\”**. Final answer should be in a ‘\boxed()’.
+
+## Example:
+Step1: XXX. ки\n
+Step2: XXX. ки\n
+Step3: XXX. ки\n
+Answer: \boxed(answer). ки\n
diff --git a/examples/train/agent/deepseek_r1.sh b/examples/train/agent/deepseek_r1.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3640384f4f9080bc5583817ba42dbfea7d05c39b
--- /dev/null
+++ b/examples/train/agent/deepseek_r1.sh
@@ -0,0 +1,27 @@
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
+    --train_type full \
+    --dataset AI-ModelScope/function-calling-chatml \
+    --agent_template react_en \
+    --loss_scale react \
+    --response_prefix '' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --save_only_model true \
+    --packing true \
+    --use_liger_kernel true \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --attn_impl flash_attn \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 16
diff --git a/examples/train/agent/glm4.sh b/examples/train/agent/glm4.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6a74c77b687543d81e508150db6aeb1d3459331d
--- /dev/null
+++ b/examples/train/agent/glm4.sh
@@ -0,0 +1,28 @@
+# 4 * 80GiB
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift sft \
+    --model ZhipuAI/GLM-4-9B-0414 \
+    --train_type full \
+    --dataset AI-ModelScope/function-calling-chatml \
+    --agent_template hermes \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --save_only_model true \
+    --packing true \
+    --deepspeed zero3 \
+    --use_liger_kernel true \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --attn_impl flash_attn \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 16
diff --git a/examples/train/agent/loss_scale/infer_lora.py b/examples/train/agent/loss_scale/infer_lora.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf38deea2fc3e280acd3350cd837cc436d1af692
--- /dev/null
+++ b/examples/train/agent/loss_scale/infer_lora.py
@@ -0,0 +1,90 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# os.environ['SWIFT_DEBUG'] = '1'
+
+
+def infer(engine: 'InferEngine', infer_request: 'InferRequest'):
+    stop = [engine.default_template.agent_template.keyword.observation]  # compat react_en
+    request_config = RequestConfig(max_tokens=512, temperature=0, stop=stop)
+    resp_list = engine.infer([infer_request], request_config)
+    query = infer_request.messages[0]['content']
+    response = resp_list[0].choices[0].message.content
+    print(f'query: {query}')
+    print(f'response: {response}')
+    print(f'tool_calls: {resp_list[0].choices[0].message.tool_calls}')
+
+    tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
+    print(f'tool_response: {tool}')
+    infer_request.messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
+    resp_list = engine.infer([infer_request], request_config)
+    response2 = resp_list[0].choices[0].message.content
+    print(f'response2: {response2}')
+
+
+def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'):
+    stop = [engine.default_template.agent_template.keyword.observation]
+    request_config = RequestConfig(max_tokens=512, temperature=0, stream=True, stop=stop)
+    gen_list = engine.infer([infer_request], request_config)
+    query = infer_request.messages[0]['content']
+    response = ''
+    print(f'query: {query}\nresponse: ', end='')
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        delta = resp.choices[0].delta.content
+        response += delta
+        print(delta, end='', flush=True)
+    print()
+    print(f'tool_calls: {resp.choices[0].delta.tool_calls}')
+
+    tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
+    print(f'tool_response: {tool}\nresponse2: ', end='')
+    infer_request.messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
+    gen_list = engine.infer([infer_request], request_config)
+    for resp in gen_list[0]:
+        if resp is None:
+            continue
+        print(resp.choices[0].delta.content, end='', flush=True)
+    print()
+
+
+def get_infer_request():
+    return InferRequest(
+        messages=[{
+            'role': 'user',
+            'content': "How's the weather in Beijing today?"
+        }],
+        tools=[{
+            'name': 'get_current_weather',
+            'description': 'Get the current weather in a given location',
+            'parameters': {
+                'type': 'object',
+                'properties': {
+                    'location': {
+                        'type': 'string',
+                        'description': 'The city and state, e.g. San Francisco, CA'
+                    },
+                    'unit': {
+                        'type': 'string',
+                        'enum': ['celsius', 'fahrenheit']
+                    }
+                },
+                'required': ['location']
+            }
+        }])
+
+
+if __name__ == '__main__':
+    from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig
+    from swift.plugin import agent_templates
+    model = 'Qwen/Qwen2.5-3B'
+    adapters = ['output/vx-xxx/checkpoint-xxx']
+    engine = PtEngine(model, adapters=adapters, max_batch_size=8)
+
+    # agent_template = agent_templates['hermes']()  # react_en/qwen_en/qwen_en_parallel
+    # engine.default_template.agent_template = agent_template
+
+    infer(engine, get_infer_request())
+    infer_stream(engine, get_infer_request())
diff --git a/examples/train/agent/loss_scale/train.sh b/examples/train/agent/loss_scale/train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8b618bd0c467d3dbbcefce0cf3aea9bf3cef3f6f
--- /dev/null
+++ b/examples/train/agent/loss_scale/train.sh
@@ -0,0 +1,28 @@
+# 20GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-3B \
+    --train_type lora \
+    --dataset AI-ModelScope/function-calling-chatml#10000 \
+    --loss_scale hermes \
+    --agent_template hermes \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --modules_to_save embed_tokens lm_head \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --use_liger_kernel true \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 16
diff --git a/examples/train/agent/qwen2_5.sh b/examples/train/agent/qwen2_5.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c7b882aea26436d6729931b356d238cec2d4f9a5
--- /dev/null
+++ b/examples/train/agent/qwen2_5.sh
@@ -0,0 +1,26 @@
+# 35GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-3B \
+    --train_type full \
+    --dataset AI-ModelScope/function-calling-chatml \
+    --agent_template hermes \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --save_only_model true \
+    --packing true \
+    --use_liger_kernel true \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --attn_impl flash_attn \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 16
diff --git a/examples/train/all_to_all/infer.sh b/examples/train/all_to_all/infer.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2242b02c636acc4b4aaf0ca3384980fa872a712e
--- /dev/null
+++ b/examples/train/all_to_all/infer.sh
@@ -0,0 +1,9 @@
+# 53GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model BAAI/Emu3-Gen \
+    --infer_backend pt \
+    --stream False \
+    --use_chat_template False \
+    --top_k 2048 \
+    --max_new_tokens 40960
diff --git a/examples/train/all_to_all/train.sh b/examples/train/all_to_all/train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7018f8f2f92e21448b00abf898482be3da77eeaa
--- /dev/null
+++ b/examples/train/all_to_all/train.sh
@@ -0,0 +1,23 @@
+# 70 GiB * 2
+nproc_per_node=2
+NPROC_PER_NODE=$nproc_per_node \
+CUDA_VISIBLE_DEVICES=0,2 \
+max_position_embeddings=10240 \
+image_area=518400 \
+swift sft \
+    --model BAAI/Emu3-Gen \
+    --train_type lora \
+    --dataset 'swift/TextCaps#40' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 4 \
+    --warmup_ratio 0.03 \
+    --eval_steps 500 \
+    --save_steps 500 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 1024 \
+    --weight_decay 0.1 \
+    --gradient_checkpointing_kwargs '{"use_reentrant": false}'
diff --git a/examples/train/base_to_chat/full.sh b/examples/train/base_to_chat/full.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8587aa6f778e0c915b1e6cdd234d6400e07df19f
--- /dev/null
+++ b/examples/train/base_to_chat/full.sh
@@ -0,0 +1,28 @@
+nproc_per_node=2
+
+CUDA_VISIBLE_DEVICES=0,1 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model Qwen/Qwen2.5-1.5B \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot \
+    --deepspeed zero2
diff --git a/examples/train/base_to_chat/lora.sh b/examples/train/base_to_chat/lora.sh
new file mode 100644
index 0000000000000000000000000000000000000000..32ab0dca898dd0a4a16f56b71b95567ab3ae5c95
--- /dev/null
+++ b/examples/train/base_to_chat/lora.sh
@@ -0,0 +1,34 @@
+# Use `--template default`
+nproc_per_node=2
+
+CUDA_VISIBLE_DEVICES=0,1 \
+MASTER_PORT=29501 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model Qwen/Qwen2.5-1.5B \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition' \
+    --torch_dtype bfloat16 \
+    --template default \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot \
+    --deepspeed zero2
diff --git a/examples/train/base_to_chat/lora2.sh b/examples/train/base_to_chat/lora2.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d8ad71b407d5af3abd1ce95b3e145ccb566c06cb
--- /dev/null
+++ b/examples/train/base_to_chat/lora2.sh
@@ -0,0 +1,33 @@
+# Use `--target_modules all-linear embed_tokens lm_head`
+# Please adjust the `lm_head` according to the model.
+nproc_per_node=2
+
+CUDA_VISIBLE_DEVICES=0,1 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model Qwen/Qwen2.5-1.5B \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear embed_tokens lm_head \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot \
+    --deepspeed zero2
diff --git a/examples/train/embedding/train_gme.sh b/examples/train/embedding/train_gme.sh
new file mode 100644
index 0000000000000000000000000000000000000000..53857492753150b692b74ee50984dee6bcf1a2cf
--- /dev/null
+++ b/examples/train/embedding/train_gme.sh
@@ -0,0 +1,29 @@
+nproc_per_node=8
+
+# losses: plugin/loss.py
+# 8*40G
+MAX_PIXELS=1003520 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model iic/gme-Qwen2-VL-2B-Instruct \
+    --train_type lora \
+    --dataset 'swift/TextCaps:emb' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 2 \
+    --gradient_accumulation_steps $(expr 64 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --eval_strategy steps \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --lazy_tokenize true \
+    --warmup_ratio 0.05 \
+    --learning_rate 5e-6 \
+    --deepspeed zero3 \
+    --dataloader_num_workers 4 \
+    --task_type embedding \
+    --loss_type infonce \
+    --dataloader_drop_last true
diff --git a/examples/train/embedding/train_gte.sh b/examples/train/embedding/train_gte.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5ad7c0c4962700d2a2787407fa87e48c5e47f8c0
--- /dev/null
+++ b/examples/train/embedding/train_gte.sh
@@ -0,0 +1,31 @@
+nproc_per_node=8
+# 4*12G
+# losses: plugin/loss.py
+# data format: docs/source_en/Customization/Custom-dataset.md
+# --use_chat_template must be false to use generation template
+# --dataloader_drop_last must be true or eval gather will throw error
+# --model iic/gte-modernbert-base modernbert also supported
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model iic/gte_Qwen2-7B-instruct \
+    --train_type lora \
+    --dataset 'sentence-transformers/stsb' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps $(expr 64 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --eval_strategy steps \
+    --use_chat_template false \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --learning_rate 5e-6 \
+    --deepspeed zero3 \
+    --dataloader_num_workers 4 \
+    --task_type embedding \
+    --loss_type cosine_similarity \
+    --dataloader_drop_last true \
diff --git a/examples/train/full/infer.sh b/examples/train/full/infer.sh
new file mode 100644
index 0000000000000000000000000000000000000000..09da3bb9313deb607f211b1f410efc77dd3de2d9
--- /dev/null
+++ b/examples/train/full/infer.sh
@@ -0,0 +1,7 @@
+# If you are using the validation set for inference, add the parameter `--load_data_args true`.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
diff --git a/examples/train/full/qwen2_5_32b.sh b/examples/train/full/qwen2_5_32b.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a1d1808d75c192ca9165ed442f1f8d2aa825b10d
--- /dev/null
+++ b/examples/train/full/qwen2_5_32b.sh
@@ -0,0 +1,28 @@
+# 8 * 80GiB
+NPROC_PER_NODE=8 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift sft \
+    --model Qwen/Qwen2.5-32B \
+    --train_type full \
+    --dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT' \
+    --torch_dtype bfloat16 \
+    --max_steps 2000 \
+    --streaming true \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 2 \
+    --packing true \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 8 \
+    --dataset_num_proc 8 \
+    --save_total_limit 2 \
+    --save_only_model true \
+    --output_dir output/Qwen2.5-32B \
+    --deepspeed zero3 \
+    --use_liger_kernel true \
+    --attn_impl flash_attn
diff --git a/examples/train/full/train.sh b/examples/train/full/train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c8d11703a2e092dd03c7efb8b795ffd324258d4b
--- /dev/null
+++ b/examples/train/full/train.sh
@@ -0,0 +1,25 @@
+# 76GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
diff --git a/examples/train/grpo/external/README.md b/examples/train/grpo/external/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e71c0807af55530dd357b85b2e1781313afc33b0
--- /dev/null
+++ b/examples/train/grpo/external/README.md
@@ -0,0 +1,46 @@
+# README: GRPO External Mode Execution Scripts
+
+---
+
+> **Note**: External mode requires vLLM version 0.8.3 or higher.
+
+
+## **Introduction**
+
+The GRPO (Gradient-based Reinforcement Policy Optimization) training framework supports high-performance inference engines like vLLM to accelerate the sampling process. The **External Mode** allows you to connect to an external vLLM inference server, separating the inference service from the training process. This mode is ideal for scenarios where you want to offload inference to dedicated hardware or servers, improving resource utilization and scalability.
+
+This folder contains scripts and instructions for running GRPO in **External Mode**, enabling integration with an external vLLM server.
+
+Before running the scripts, ensure the following:
+
+1. **vLLM Server Deployment**:
+   - An external vLLM server must be deployed and accessible.
+   - Use the `swift rollout` command to deploy the vLLM server.
+
+2. **Network Connectivity**:
+   - Ensure the training nodes can communicate with the vLLM server over the network.
+
+## **Deploying the vLLM Server**
+
+To deploy an external vLLM server, use the following command:
+
+```bash
+CUDA_VISIBLE_DEVICES=0 \
+swift rollout \
+  --model Qwen/Qwen3-8B
+
+# tp
+CUDA_VISIBLE_DEVICES=0,1 \
+swift rollout \
+  --model Qwen/Qwen3-8B \
+  --tensor_parallel_size 2
+```
+
+## Training with External vLLM Server
+```bash
+--vllm_server_host <server ip> \
+--vllm_server_port <server port> \
+--vllm_server_timeout <Timeout duration> \
+```
+Configuration Parameters
+When using an external vLLM server, configure the following parameters:
diff --git a/examples/train/grpo/external/grpo.sh b/examples/train/grpo/external/grpo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c5bc9540364c21c170095e9d1510cfb9f8c0606c
--- /dev/null
+++ b/examples/train/grpo/external/grpo.sh
@@ -0,0 +1,33 @@
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-32B-Instruct \
+    --reward_funcs accuracy \
+    --use_vllm true \
+    --vllm_server_host xxx \
+    --vllm_server_port 8000 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset AI-MO/NuminaMath-TIR#1000 \
+    --max_completion_length 2048 \
+    --num_train_epochs 3 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 1 \
+    --save_total_limit 2 \
+    --logging_steps 1 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --top_p 0.9 \
+    --top_k 50 \
+    --deepspeed zero3 \
+    --log_completions true \
+    --num_iterations 1 \
+    --num_infer_workers 1 \
+    --report_to tensorboard wandb \
+    --beta 0.0
diff --git a/examples/train/grpo/internal/README.md b/examples/train/grpo/internal/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d15220fc6cdf2a9ad7cb51bf8050b9aafb39258a
--- /dev/null
+++ b/examples/train/grpo/internal/README.md
@@ -0,0 +1,48 @@
+# README: GRPO Internal Mode Execution Scripts
+
+---
+
+## Known Issues
+Bugs in **vLLM >= 0.8**
+1. DeepSpeed ZeRO-3 Mode :
+    When using DeepSpeed's ZeRO-3 configuration, gradients may become zero during training.
+
+2. Async Mode
+    In certain scenarios, the asynchronous mode (Async Mode) may hang, causing the program to become unresponsive.
+
+To ensure stability and compatibility, it is recommended to use **vLLM 0.7.3** to avoid the above issues.
+
+
+## **Introduction**
+
+The GRPO (Gradient-based Reinforcement Policy Optimization) training framework supports integrating high-performance inference engines like vLLM to accelerate the sampling process. The **Internal Mode** allows the inference service to be directly launched within the Trainer, reducing external dependencies and simplifying deployment.
+
+This folder contains scripts and instructions for running GRPO in **Internal Mode**, where the model training and inference are tightly integrated with flexible resource allocation strategies.
+
+
+## **Resource Allocation Strategies**
+
+GRPO provides two resource allocation strategies under the Internal mode:
+
+### 1. **Colocate Mode**
+
+- **Description**: Training and inference share GPU resources.
+- **Recommended Setting**:
+  - Set `sleep_level=1` to release vLLM memory during training steps.
+- **Resource Allocation Rules**:
+  ```plaintext
+  NPROC_PER_NODE = Total number of GPUs
+  num_infer_workers = Total number of GPUs
+  ```
+
+### 2. **Async Mode**
+
+- **Description**: Training and inference use independent GPU resources.
+- **Recommended Setting**:
+  - Set `sleep_level=1` to release vLLM memory during training steps.
+- **Resource Allocation Rules**:
+  ```plaintext
+    NPROC_PER_NODE = Number of training GPUs
+    num_infer_workers = Number of inference GPUs
+    Must satisfy: Number of training GPUs + Number of inference GPUs = Total GPU count
+  ```
diff --git a/examples/train/grpo/internal/full_lmdeploy.sh b/examples/train/grpo/internal/full_lmdeploy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..feda75e9b426b856146619749eda576832d9b4cf
--- /dev/null
+++ b/examples/train/grpo/internal/full_lmdeploy.sh
@@ -0,0 +1,41 @@
+# A800 * 8
+# pip install lmdeploy==0.7.1
+# exp link: https://wandb.ai/tastelikefeet/grpo_perf_test?nw=nwuseryuzezyz
+# In exp no `--system 'examples/train/grpo/prompt.txt'`, so the format reward is not correct and there are speed diffs with this script
+# important args: --num_infer_workers 2 --num_iterations 2 --use_lmdeploy true --async_generate true
+# if forward/backward error: pip install deepspeed==0.14.5
+# and change deepspeed zero3.json stage3_prefetch_bucket_size=0
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=6 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --reward_funcs accuracy format \
+    --use_lmdeploy true \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset AI-MO/NuminaMath-TIR#5000 \
+    --max_completion_length 1536 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 10 \
+    --per_device_eval_batch_size 10 \
+    --learning_rate 1e-6 \
+    --eval_steps 1000 \
+    --save_steps 1000 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 60 \
+    --temperature 1.0 \
+    --top_p 0.9 \
+    --top_k 50 \
+    --async_generate true \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero3 \
+    --log_completions true \
+    --num_iterations 2 \
+    --num_infer_workers 2 \
diff --git a/examples/train/grpo/internal/full_vllm.sh b/examples/train/grpo/internal/full_vllm.sh
new file mode 100644
index 0000000000000000000000000000000000000000..df5c59fcdd2cd7c9312fd829490b8fa2112a57bc
--- /dev/null
+++ b/examples/train/grpo/internal/full_vllm.sh
@@ -0,0 +1,38 @@
+# One GPU is left for vLLM inference acceleration.
+# pip install math_verify # reward function
+# pip install -U trl
+# GPU memory: 8 * 80GiB
+
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=7 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --reward_funcs accuracy format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.7 \
+    --vllm_max_model_len 8192 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 7 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true
diff --git a/examples/train/grpo/internal/full_vllm_qwenvl.sh b/examples/train/grpo/internal/full_vllm_qwenvl.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1dcfca57a996054b9c50304e0c6265c7c416b838
--- /dev/null
+++ b/examples/train/grpo/internal/full_vllm_qwenvl.sh
@@ -0,0 +1,41 @@
+# Two GPUs are left for vLLM inference acceleration.
+# pip install math_verify # reward function
+# pip install -U trl
+# GPU memory: 8 * 60GiB
+
+MAX_PIXELS=602112 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=6 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_r1v_acc format \
+    --use_vllm true \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset lmms-lab/multimodal-open-r1-8k-verified \
+    --max_completion_length 1536 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-7 \
+    --eval_steps 1000 \
+    --save_steps 1000 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 24 \
+    --temperature 1.0 \
+    --top_p 0.9 \
+    --top_k 50 \
+    --async_generate true \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true \
+    --num_iterations 1 \
+    --num_infer_workers 2 \
+    --report_to tensorboard wandb
diff --git a/examples/train/grpo/internal/grpo.sh b/examples/train/grpo/internal/grpo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ec464ca0c8600176df4282aa4a9334b5e101b593
--- /dev/null
+++ b/examples/train/grpo/internal/grpo.sh
@@ -0,0 +1,34 @@
+# pip install math_verify # reward function
+# pip install -U trl
+# GPU memory: 80GiB
+# You can set `--reward_model` to use a reward model to provide rewards.
+CUDA_VISIBLE_DEVICES=0 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --reward_funcs accuracy format \
+    --train_type lora \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#1000' \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 4 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --log_completions true
diff --git a/examples/train/grpo/internal/lora_qwenvl72b.sh b/examples/train/grpo/internal/lora_qwenvl72b.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d5401132b0ddb89ca50abc3a4b6f88b2db87e643
--- /dev/null
+++ b/examples/train/grpo/internal/lora_qwenvl72b.sh
@@ -0,0 +1,52 @@
+# pip install math_verify # reward function
+# GPU memory: 8 * 80GiB
+
+# Note: If the grad_norm remains zero during training,
+# please remove the `--offload_model true` parameter, or use `vllm==0.7.3`.
+
+MAX_PIXELS=602112 \
+WANDB_API_KEY=xxx \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+  --rlhf_type grpo \
+  --model Qwen/Qwen2.5-VL-72B-Instruct \
+  --train_type lora \
+  --dataset lmms-lab/multimodal-open-r1-8k-verified#1000 \
+  --external_plugins examples/train/grpo/plugin/plugin.py \
+  --reward_funcs external_r1v_acc format \
+  --reward_weights 1 0.1 \
+  --torch_dtype bfloat16 \
+  --attn_impl flash_attn \
+  --num_train_epochs 1 \
+  --max_length 8192 \
+  --per_device_train_batch_size 1 \
+  --per_device_eval_batch_size 1 \
+  --gradient_accumulation_steps 1 \
+  --eval_steps 500 \
+  --save_steps 500 \
+  --learning_rate 1e-6 \
+  --save_total_limit 2 \
+  --logging_steps 1 \
+  --warmup_ratio 0.05 \
+  --dataloader_num_workers 4 \
+  --max_completion_length 2048 \
+  --num_generations 8 \
+  --use_vllm true \
+  --vllm_gpu_memory_utilization 0.5 \
+  --vllm_max_model_len 8192 \
+  --deepspeed zero3 \
+  --temperature 1.1 \
+  --top_p 1.0 \
+  --top_k 80 \
+  --log_completions true \
+  --num_infer_workers 8 \
+  --tensor_parallel_size 4 \
+  --async_generate false \
+  --offload_optimizer true \
+  --offload_model true \
+  --gc_collect_after_offload true \
+  --move_model_batches 40 \
+  --sleep_level 1 \
+  --report_to wandb \
+  --system examples/train/grpo/prompt.txt
diff --git a/examples/train/grpo/internal/lora_vllm.sh b/examples/train/grpo/internal/lora_vllm.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d9cc6269fbe2aa4eb81254cb8255f5f9189f7758
--- /dev/null
+++ b/examples/train/grpo/internal/lora_vllm.sh
@@ -0,0 +1,40 @@
+# pip install math_verify # reward function
+# pip install -U trl
+# GPU memory: 2 * 80GiB
+
+MASTER_PORT=29501 \
+CUDA_VISIBLE_DEVICES=0,1 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --reward_funcs accuracy format \
+    --train_type lora \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.5 \
+    --vllm_max_model_len 8192 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#1000' \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 16 \
+    --per_device_eval_batch_size 16 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 16 \
+    --temperature 0.9 \
+    --deepspeed zero2 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --log_completions true
diff --git a/examples/train/grpo/internal/multi_gpu_agent.sh b/examples/train/grpo/internal/multi_gpu_agent.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7210dfe42456a45c05806afee2241020bc4e80c6
--- /dev/null
+++ b/examples/train/grpo/internal/multi_gpu_agent.sh
@@ -0,0 +1,35 @@
+# wandb result link: https://wandb.ai/tastelikefeet/tastelikefeet?nw=nwuseryuzezyz
+# model link: https://www.modelscope.cn/models/swift/Qwen2-7B-Agent-GRPO
+# WANDB_API_KEY=xxx \
+NPROC_PER_NODE=7 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --train_type full \
+    --dataset LLM-Research/xlam-function-calling-60k \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --per_device_train_batch_size 7 \
+    --per_device_eval_batch_size 7 \
+    --eval_steps 2000 \
+    --save_steps 2000 \
+    --learning_rate 1e-6 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --max_completion_length 1024 \
+    --reward_funcs toolbench react_format \
+    --num_generations 49 \
+    --use_vllm true \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed zero3 \
+    --temperature 1.0 \
+    --stop_words Observation: \
+    --agent_template react_grpo \
+    --top_p 0.85 \
+    --top_k 50 \
+    --log_completions true \
+    --report_to wandb
diff --git a/examples/train/grpo/internal/multi_gpu_mp_colocate.sh b/examples/train/grpo/internal/multi_gpu_mp_colocate.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5f17a47ec449a4fa787f1c3f17bcc186f37b0ccf
--- /dev/null
+++ b/examples/train/grpo/internal/multi_gpu_mp_colocate.sh
@@ -0,0 +1,31 @@
+MAX_PIXELS=1003520 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-VL-7B-Instruct \
+    --train_type lora \
+    --dataset AI-ModelScope/chartqa_digit_r1v_format \
+    --torch_dtype bfloat16 \
+    --system examples/train/grpo/prompt.txt \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --gradient_accumulation_steps 1 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --max_completion_length 1024 \
+    --reward_funcs accuracy format \
+    --num_generations 8 \
+    --use_vllm true \
+    --vllm_gpu_memory_utilization 0.5 \
+    --sleep_level 1 \
+    --deepspeed zero3 \
+    --num_infer_workers 8 \
+    --tensor_parallel_size 4 \
+    --temperature 1.0 \
+    --top_p 0.85
diff --git a/examples/train/grpo/internal/train_72b_4gpu.sh b/examples/train/grpo/internal/train_72b_4gpu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3461db00e1bd7f528ef9bba2fe57f9b7adb9fdb0
--- /dev/null
+++ b/examples/train/grpo/internal/train_72b_4gpu.sh
@@ -0,0 +1,46 @@
+# 4*80G GPU
+
+# Note: If the grad_norm remains zero during training,
+# please remove the `--offload_model true` parameter, or use `vllm==0.7.3`.
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NPROC_PER_NODE=4 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-72B-Instruct \
+    --train_type lora \
+    --dataset AI-MO/NuminaMath-TIR#10000 \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 1000 \
+    --save_steps 1000 \
+    --learning_rate 1e-6 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --max_completion_length 1024 \
+    --reward_funcs accuracy format \
+    --num_generations 4 \
+    --system examples/train/grpo/prompt.txt \
+    --use_vllm true \
+    --vllm_gpu_memory_utilization 0.5 \
+    --vllm_max_model_len 2048 \
+    --deepspeed zero3_offload \
+    --temperature 1.0 \
+    --top_p 1.0 \
+    --top_k 80 \
+    --log_completions true \
+    --num_infer_workers 4 \
+    --tensor_parallel_size 4 \
+    --async_generate false \
+    --move_model_batches 16 \
+    --offload_optimizer true \
+    --offload_model true \
+    --gc_collect_after_offload true \
+    --sleep_level 1
diff --git a/examples/train/grpo/internal/train_multi_round.sh b/examples/train/grpo/internal/train_multi_round.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e9d5042f0302c592ba81face63d39ad817239402
--- /dev/null
+++ b/examples/train/grpo/internal/train_multi_round.sh
@@ -0,0 +1,44 @@
+# Note: If the grad_norm remains zero during training,
+# please remove the `--offload_model true` parameter, or use `vllm==0.7.3`.
+
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --train_type full \
+    --dataset AI-MO/NuminaMath-TIR#10000 \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 1000 \
+    --save_steps 1000 \
+    --learning_rate 1e-6 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --max_completion_length 1024 \
+    --reward_funcs accuracy format \
+    --num_generations 32 \
+    --system examples/train/grpo/prompt.txt \
+    --use_vllm true \
+    --vllm_gpu_memory_utilization 0.5 \
+    --vllm_max_model_len 2048 \
+    --deepspeed zero3 \
+    --temperature 1.0 \
+    --top_p 1.0 \
+    --top_k 80 \
+    --log_completions true \
+    --num_infer_workers 8 \
+    --tensor_parallel_size 4 \
+    --async_generate false \
+    --offload_optimizer true \
+    --offload_model true \
+    --gc_collect_after_offload true \
+    --sleep_level 1 \
+    --multi_turn_func math_tip_trick
diff --git a/examples/train/grpo/multi_node/Qwen2_5_32B_full.sh b/examples/train/grpo/multi_node/Qwen2_5_32B_full.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7ba4efa3d879b5c8b438d03478b4573b51d9f6b7
--- /dev/null
+++ b/examples/train/grpo/multi_node/Qwen2_5_32B_full.sh
@@ -0,0 +1,48 @@
+# External vLLM
+
+# Assume we have two nodes, one with 8 GPUs of 80GB each (880G) and another with 2 GPUs of 80GB each (2 80G).
+#   NODE1. The node with 2*80G will be used to deploy the vLLM server.
+#   NODE2. The node with 8*80G will be used for full-parameter fine-tuning of the 32B model.
+
+# Note : Use beta=0 to disable the reference model; otherwise, it may lead to Out-of-Memory (OOM) errors.
+
+# NODE1 for vLLM Server
+CUDA_VISIBLE_DEVICES=0,1 \
+swift rollout \
+    --model Qwen/Qwen2.5-32B-Instruct \
+    --tensor_parallel_size 2
+
+# NODE2 for Training
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-32B-Instruct \
+    --reward_funcs accuracy \
+    --use_vllm true \
+    --vllm_server_host xxx \
+    --vllm_server_port 8000 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset AI-MO/NuminaMath-TIR#1000 \
+    --max_completion_length 2048 \
+    --num_train_epochs 3 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 1 \
+    --save_total_limit 2 \
+    --logging_steps 1 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --top_p 0.9 \
+    --top_k 50 \
+    --deepspeed zero3 \
+    --log_completions true \
+    --num_iterations 1 \
+    --num_infer_workers 1 \
+    --report_to tensorboard wandb \
+    --beta 0.0
diff --git a/examples/train/grpo/multi_node/multi_node1.sh b/examples/train/grpo/multi_node/multi_node1.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8e895a7f67621e9d780c7c3f910d9a21d765a21a
--- /dev/null
+++ b/examples/train/grpo/multi_node/multi_node1.sh
@@ -0,0 +1,44 @@
+# Internal vLLM
+
+# pip install math_verify # reward function
+# pip install -U trl
+# note: Note: The parameters of each node need to be consistent.
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+export NNODES=2
+export NODE_RANK=0
+export MASTER_ADDR=127.0.0.1
+export MASTER_PORT=29500
+export NPROC_PER_NODE=3
+
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-Math-7B \
+    --reward_funcs accuracy format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.5 \
+    --vllm_max_model_len 4096 \
+    --num_infer_workers 1 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 7 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true
diff --git a/examples/train/grpo/multi_node/multi_node2.sh b/examples/train/grpo/multi_node/multi_node2.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c3786b7ca37fafafefb626e5eb94867642ef616b
--- /dev/null
+++ b/examples/train/grpo/multi_node/multi_node2.sh
@@ -0,0 +1,39 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+export NNODES=2
+export NODE_RANK=1
+export MASTER_ADDR=xxx.xxx.xxx.xxx
+export MASTER_PORT=29500
+export NPROC_PER_NODE=3
+
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-Math-7B \
+    --reward_funcs accuracy format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.5 \
+    --vllm_max_model_len 4096 \
+    --num_infer_workers 1 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 7 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true
diff --git a/examples/train/grpo/multi_node/train_dlc.sh b/examples/train/grpo/multi_node/train_dlc.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a020dd24ffb695765efa5f33efe889e2eef3df35
--- /dev/null
+++ b/examples/train/grpo/multi_node/train_dlc.sh
@@ -0,0 +1,39 @@
+# This script is used in DLC (Deep Learning Containers)
+# For more information, visit:https://www.aliyun.com/activity/bigdata/pai-dlc
+NNODES=$WORLD_SIZE \
+NODE_RANK=$RANK \
+PYTHONPATH=. \
+torchrun \
+    --nproc_per_node=8 \
+    --nnodes=${WORLD_SIZE} \
+    --node_rank=${RANK} \
+    swift/cli/rlhf.py \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --train_type full \
+    --dataset AI-MO/NuminaMath-TIR#10000 \
+    --torch_dtype bfloat16 \
+    --system examples/train/grpo/prompt.txt \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --vllm_max_model_len 2048 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-6 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --output_dir output \
+    --gradient_accumulation_steps 1 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --max_completion_length 2048 \
+    --reward_funcs accuracy format \
+    --num_generations 48 \
+    --use_vllm true \
+    --vllm_gpu_memory_utilization 0.3 \
+    --sleep_level 1 \
+    --deepspeed zero3_offload \
+    --num_infer_workers 8 \
+    --tensor_parallel_size 4 \
+    --temperature 1.0 \
+    --top_p 0.85
diff --git a/examples/train/grpo/plugin/plugin.py b/examples/train/grpo/plugin/plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..d693349a69c747a1594807a2a0295baf9136f09f
--- /dev/null
+++ b/examples/train/grpo/plugin/plugin.py
@@ -0,0 +1,478 @@
+import asyncio
+import re
+from copy import deepcopy
+from typing import List
+
+import json
+import torch
+
+from swift.llm import Template, to_device
+from swift.plugin import ORM, orms, rm_plugins
+from swift.utils import get_logger
+
+logger = get_logger()
+"""
+Step 1: Define a Reward Class
+    Implement your custom reward calculation logic within the __call__ method.
+    The method accepts the model's output completions and dataset columns (passed as kwargs) as input parameters.
+
+Step 2: Register the Reward Class in orms
+    For example:
+    python orms['external_math_acc'] = MathAccuracy
+
+Step 3: Configure the Arguments
+    Use the following arguments when running the script:
+    bash --plugin /path/to/plugin.py --reward_funcs external_math_acc
+"""
+
+
+# Code borrowed from plugin/orm.py
+class MathAccuracy(ORM):
+
+    def __init__(self):
+        import importlib.util
+        assert importlib.util.find_spec('math_verify') is not None, (
+            "The math_verify package is required but not installed. Please install it using 'pip install math_verify'.")
+
+    def __call__(self, completions, solution, **kwargs) -> List[float]:
+        from latex2sympy2_extended import NormalizationConfig
+        from math_verify import LatexExtractionConfig, parse, verify
+        rewards = []
+        for content, sol in zip(completions, solution):
+            gold_parsed = parse(sol, extraction_mode='first_match', extraction_config=[LatexExtractionConfig()])
+            if len(gold_parsed) != 0:
+                # We require the answer to be provided in correct latex (no malformed operators)
+                answer_parsed = parse(
+                    content,
+                    extraction_config=[
+                        LatexExtractionConfig(
+                            normalization_config=NormalizationConfig(
+                                nits=False,
+                                malformed_operators=False,
+                                basic_latex=True,
+                                equations=True,
+                                boxed=True,
+                                units=True,
+                            ),
+                            # Ensures that boxed is tried first
+                            boxed_match_priority=0,
+                            try_extract_without_anchor=False,
+                        )
+                    ],
+                    extraction_mode='first_match',
+                )
+                # Reward 1 if the content is the same as the ground truth, 0 otherwise
+                reward = float(verify(answer_parsed, gold_parsed))
+            else:
+                # If the gold solution is not parseable, we reward 1 to skip this example
+                reward = 1.0
+            rewards.append(reward)
+        return rewards
+
+
+class MathFormat(ORM):
+
+    def __call__(self, completions, **kwargs) -> List[float]:
+        """Reward function that checks if the completion has a specific format."""
+        pattern = r'^<think>.*?</think>\s*<answer>.*?</answer>(?![\s\S])'
+        matches = [re.match(pattern, content, re.DOTALL | re.MULTILINE) for content in completions]
+        return [1.0 if match else 0.0 for match in matches]
+
+
+class CountdownORM(ORM):
+
+    def __call__(self, completions, target, nums, **kwargs) -> List[float]:
+        """
+        Evaluates completions based on Mathematical correctness of the answer
+
+        Args:
+            completions (list[str]): Generated outputs
+            target (list[str]): Expected answers
+            nums (list[str]): Available numbers
+
+        Returns:
+            list[float]: Reward scores
+        """
+        rewards = []
+        for completion, gt, numbers in zip(completions, target, nums):
+            try:
+                # Check if the format is correct
+                match = re.search(r'<answer>(.*?)<\/answer>', completion)
+                if match is None:
+                    rewards.append(0.0)
+                    continue
+                # Extract the "answer" part from the completion
+                equation = match.group(1).strip()
+                if '=' in equation:
+                    equation = equation.split('=')[0]
+                # Extract all numbers from the equation
+                used_numbers = [int(n) for n in re.findall(r'\d+', equation)]
+
+                # Check if all numbers are used exactly once
+                if sorted(used_numbers) != sorted(numbers):
+                    rewards.append(0.0)
+                    continue
+                # Define a regex pattern that only allows numbers, operators, parentheses, and whitespace
+                allowed_pattern = r'^[\d+\-*/().\s]+$'
+                if not re.match(allowed_pattern, equation):
+                    rewards.append(0.0)
+                    continue
+
+                # Evaluate the equation with restricted globals and locals
+                result = eval(equation, {"__builti'ns__": None}, {})
+                # Check if the equation is correct and matches the ground truth
+                if abs(float(result) - float(gt)) < 1e-5:
+                    rewards.append(1.0)
+                else:
+                    rewards.append(0.0)
+            except Exception:
+                # If evaluation fails, reward is 0
+                rewards.append(0.0)
+        return rewards
+
+
+class MultiModalAccuracyORM(ORM):
+
+    def __call__(self, completions, solution, **kwargs) -> List[float]:
+        """
+        Reward function that checks if the completion is correct.
+        Args:
+            completions (list[str]): Generated outputs
+            solution (list[str]): Ground Truths.
+
+        Returns:
+            list[float]: Reward scores
+        """
+        rewards = []
+        from math_verify import parse, verify
+        for content, sol in zip(completions, solution):
+            reward = 0.0
+            # Try symbolic verification first
+            try:
+                answer = parse(content)
+                if float(verify(answer, parse(sol))) > 0:
+                    reward = 1.0
+            except Exception:
+                pass  # Continue to next verification method if this fails
+
+            # If symbolic verification failed, try string matching
+            if reward == 0.0:
+                try:
+                    # Extract answer from solution if it has think/answer tags
+                    sol_match = re.search(r'<answer>(.*?)</answer>', sol)
+                    ground_truth = sol_match.group(1).strip() if sol_match else sol.strip()
+
+                    # Extract answer from content if it has think/answer tags
+                    content_match = re.search(r'<answer>(.*?)</answer>', content)
+                    student_answer = content_match.group(1).strip() if content_match else content.strip()
+
+                    # Compare the extracted answers
+                    if student_answer == ground_truth:
+                        reward = 1.0
+                except Exception:
+                    pass  # Keep reward as 0.0 if both methods fail
+            rewards.append(reward)
+        return rewards
+
+
+# ref implementation: https://github.com/huggingface/open-r1/blob/main/src/open_r1/rewards.py
+class CodeReward(ORM):
+
+    def __init__(self):
+        import importlib.util
+        assert importlib.util.find_spec('e2b') is not None, (
+            "The e2b package is required but not installed. Please install it using 'pip install e2b-code-interpreter'."
+        )
+        from dotenv import load_dotenv
+        load_dotenv()
+
+    @staticmethod
+    def extract_code(completion: str, language: str) -> str:
+        pattern = re.compile(rf'```{language}\n(.*?)```', re.DOTALL)
+        matches = pattern.findall(completion)
+        extracted_answer = matches[-1] if len(matches) >= 1 else ''
+        return extracted_answer
+
+    def run_async_from_sync(self, scripts: List[str], languages: List[str]) -> List[float]:
+        """Function wrapping the `run_async` function."""
+        # Create a new event loop and set it
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+
+        try:
+            # Run the async function and get the result
+            rewards = loop.run_until_complete(self.run_async(scripts, languages))
+        finally:
+            loop.close()
+
+        return rewards
+
+    async def run_async(self, scripts: List[str], languages: List[str]) -> List[float]:
+        from e2b_code_interpreter import AsyncSandbox
+
+        # Create the sandbox by hand, currently there's no context manager for this version
+        try:
+            sbx = await AsyncSandbox.create(timeout=30, request_timeout=3)
+        except Exception as e:
+            logger.warning(f'Error from E2B executor: {e}')
+            return [0.0] * len(scripts)
+        # Create a list of tasks for running scripts concurrently
+        tasks = [self.run_script(sbx, script, language) for script, language in zip(scripts, languages)]
+
+        # Wait for all tasks to complete and gather their results as they finish
+        results = await asyncio.gather(*tasks)
+        rewards = list(results)  # collect results
+
+        # Kill the sandbox after all the tasks are complete
+        await sbx.kill()
+
+        return rewards
+
+    async def run_script(self, sbx, script: str, language: str) -> float:
+        try:
+            execution = await sbx.run_code(script, language=language, timeout=30)
+        except Exception as e:
+            logger.warning(f'Error from E2B executor: {e}')
+            return 0.0
+        try:
+            return float(execution.text)
+        except (TypeError, ValueError):
+            return 0.0
+
+    def __call__(self, completions, **kwargs) -> List[float]:
+        """Reward function that evaluates code snippets using the E2B code interpreter.
+
+        Assumes the dataset contains a `verification_info` column with test cases.
+        """
+        evaluation_script_template = """
+        import subprocess
+        import json
+
+        def evaluate_code(code, test_cases):
+            passed = 0
+            total = len(test_cases)
+            exec_timeout = 5
+
+            for case in test_cases:
+                process = subprocess.run(
+                    ["python3", "-c", code],
+                    input=case["input"],
+                    text=True,
+                    capture_output=True,
+                    timeout=exec_timeout
+                )
+
+                if process.returncode != 0:  # Error in execution
+                    continue
+
+                output = process.stdout.strip()
+                if output.strip() == case["output"].strip():
+                    passed += 1
+
+            success_rate = (passed / total)
+            return success_rate
+
+        code_snippet = {code}
+        test_cases = json.loads({test_cases})
+
+        evaluate_code(code_snippet, test_cases)
+        """
+        verification_info = kwargs['verification_info']
+        languages = [info['language'] for info in verification_info]
+        code_snippets = [
+            self.extract_code(completion, language) for completion, language in zip(completions, languages)
+        ]
+        scripts = [
+            evaluation_script_template.format(
+                code=json.dumps(code), test_cases=json.dumps(json.dumps(info['test_cases'])))
+            for code, info in zip(code_snippets, verification_info)
+        ]
+        try:
+            rewards = self.run_async_from_sync(scripts, languages)
+
+        except Exception as e:
+            logger.warning(f'Error from E2B executor: {e}')
+            rewards = [0.0] * len(completions)
+
+        return rewards
+
+
+class CodeFormat(ORM):
+
+    def __call__(self, completions, **kwargs) -> List[float]:
+        verification_info = kwargs['verification_info']
+        rewards = []
+        for content, info in zip(completions, verification_info):
+            pattern = r'^<think>.*?</think>\s*<answer>.*?```{}.*?```.*?</answer>(?![\s\S])'.format(info['language'])
+            match = re.match(pattern, content, re.DOTALL | re.MULTILINE)
+            reward = 1.0 if match else 0.0
+            rewards.append(reward)
+        return rewards
+
+
+class CodeRewardByJudge0(ORM):
+    LANGUAGE_ID_MAP = {
+        'assembly': 45,
+        'bash': 46,
+        'basic': 47,
+        'c': 50,
+        'c++': 54,
+        'clojure': 86,
+        'c#': 51,
+        'cobol': 77,
+        'common lisp': 55,
+        'd': 56,
+        'elixir': 57,
+        'erlang': 58,
+        'executable': 44,
+        'f#': 87,
+        'fortran': 59,
+        'go': 60,
+        'groovy': 88,
+        'haskell': 61,
+        'java': 62,
+        'javascript': 63,
+        'kotlin': 78,
+        'lua': 64,
+        'multi-file program': 89,
+        'objective-c': 79,
+        'ocaml': 65,
+        'octave': 66,
+        'pascal': 67,
+        'perl': 85,
+        'php': 68,
+        'plain text': 43,
+        'prolog': 69,
+        'python': 71,
+        'python2': 70,
+        'python3': 71,
+        'r': 80,
+        'ruby': 72,
+        'rust': 73,
+        'scala': 81,
+        'sql': 82,
+        'swift': 83,
+        'typescript': 74,
+        'visual basic.net': 84
+    }
+    PYTHON_ID = 71
+
+    def __init__(self):
+        import os
+        self.endpoint = os.getenv('JUDGE0_ENDPOINT')
+        assert self.endpoint is not None, (
+            'Judge0 endpoint is not set. Please set the JUDGE0_ENDPOINT environment variable.')
+        x_auth_token = os.getenv('JUDGE0_X_AUTH_TOKEN')
+        self.headers = {'Content-Type': 'application/json'}
+        if x_auth_token is not None:
+            self.headers['X-Auth-Token'] = x_auth_token
+
+    @staticmethod
+    def extract_code(completion: str, language: str) -> str:
+        pattern = re.compile(rf'```{language}\n(.*?)```', re.DOTALL)
+        matches = pattern.findall(completion)
+        extracted_answer = matches[-1] if len(matches) >= 1 else ''
+        return extracted_answer
+
+    @classmethod
+    def get_language_id(cls, language):
+        if language is None:
+            return cls.PYTHON_ID
+        return cls.LANGUAGE_ID_MAP.get(language.lower().strip(), cls.PYTHON_ID)
+
+    async def _evaluate_code(self, code, test_cases, language_id):
+        import aiohttp
+        try:
+            passed = 0
+            total = len(test_cases)
+
+            for case in test_cases:
+                if code is not None and code != '':
+                    async with aiohttp.ClientSession() as session:
+                        payload = {
+                            'source_code': code,
+                            'language_id': language_id,
+                            'stdin': case['input'],
+                            'expected_output': case['output']
+                        }
+                        logger.debug(f'Payload: {payload}')
+                        async with session.post(
+                                self.endpoint + '/submissions/?wait=true', json=payload,
+                                headers=self.headers) as response:
+                            response_json = await response.json()
+                            logger.debug(f'Response: {response_json}')
+                            if response_json['status']['description'] == 'Accepted':
+                                passed += 1
+
+            success_rate = (passed / total)
+            return success_rate
+        except Exception as e:
+            logger.warning(f'Error from Judge0 executor: {e}')
+            return 0.0
+
+    def run_async_from_sync(self):
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            rewards = loop.run_until_complete(self.run_async())
+        finally:
+            loop.close()
+        return rewards
+
+    async def run_async(self):
+        tasks = [
+            self._evaluate_code(code, info['test_cases'], CodeRewardByJudge0.get_language_id(info['language']))
+            for code, info in zip(self.code_snippets, self.verification_info)
+        ]
+        results = await asyncio.gather(*tasks)
+        rewards = list(results)
+        return rewards
+
+    def __call__(self, completions, **kwargs) -> List[float]:
+        self.verification_info = kwargs['verification_info']
+
+        languages = [info['language'] for info in self.verification_info]
+        self.code_snippets = [
+            self.extract_code(completion, language) for completion, language in zip(completions, languages)
+        ]
+
+        try:
+            rewards = self.run_async_from_sync()
+        except Exception as e:
+            logger.warning(f'Error from Judge0 executor: {e}')
+            rewards = [0.0] * len(completions)
+        return rewards
+
+
+orms['external_math_acc'] = MathAccuracy
+orms['external_math_format'] = MathFormat
+orms['external_countdown'] = CountdownORM
+orms['external_r1v_acc'] = MultiModalAccuracyORM
+orms['external_code_reward'] = CodeReward
+orms['external_code_format'] = CodeFormat
+orms['external_code_reward_by_judge0'] = CodeRewardByJudge0
+
+
+# For genrm you can refer to swift/llm/plugin/rm_plugin/GenRMPlugin
+class CustomizedRMPlugin:
+    """
+    Customized Reward Model Plugin, same to DefaultRMPlugin
+
+    It assumes that `self.model` is a classification model with a value head(output dimmension 1).
+    The first logits value from the model's output is used as the reward score.
+    """
+
+    def __init__(self, model, template):
+        self.model = model
+        self.template: Template = template
+
+    def __call__(self, inputs):
+        batched_inputs = [self.template.encode(deepcopy(infer_request)) for infer_request in inputs]
+        reward_inputs = to_device(self.template.data_collator(batched_inputs), self.model.device)
+        reward_inputs.pop('labels')
+
+        with torch.inference_mode():
+            return self.model(**reward_inputs).logits[:, 0]
+
+
+rm_plugins['my_rmplugin'] = CustomizedRMPlugin
diff --git a/examples/train/grpo/plugin/run_external_rm.sh b/examples/train/grpo/plugin/run_external_rm.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e2fcac2c423405e14844bcaf1619c0e466ea1ab3
--- /dev/null
+++ b/examples/train/grpo/plugin/run_external_rm.sh
@@ -0,0 +1,35 @@
+# pip install math_verify # reward function
+# pip install -U trl
+# GPU memory: 80GiB
+
+CUDA_VISIBLE_DEVICES=0 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_math_acc external_math_format \
+    --train_type lora \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#1000' \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 4 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --log_completions true
diff --git a/examples/train/grpo/prompt.txt b/examples/train/grpo/prompt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2603a3884bb3fc31489ef7302630bb246be898b
--- /dev/null
+++ b/examples/train/grpo/prompt.txt
@@ -0,0 +1 @@
+A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>
diff --git a/examples/train/grpo/qwen2_5_omni/grpo.sh b/examples/train/grpo/qwen2_5_omni/grpo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..762095cc185a8852cdbfa5b3a01b2b25bc3a197d
--- /dev/null
+++ b/examples/train/grpo/qwen2_5_omni/grpo.sh
@@ -0,0 +1,42 @@
+# 4 * 50GiB
+pip uninstall transformers
+pip install git+https://github.com/huggingface/transformers
+pip install math_verify trl -U
+
+MAX_PIXELS=1003520 \
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-Omni-7B \
+    --reward_funcs external_r1v_acc format \
+    --reward_weights 1 0.5 \
+    --train_type lora \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --torch_dtype bfloat16 \
+    --dataset lmms-lab/multimodal-open-r1-8k-verified#1000 \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 2 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 8 \
+    --temperature 1. \
+    --top_p 0.99 \
+    --top_k 50 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true
diff --git a/examples/train/grpo/qwen2_5_omni/infer.sh b/examples/train/grpo/qwen2_5_omni/infer.sh
new file mode 100644
index 0000000000000000000000000000000000000000..493635265163329858c1b5f41ac1ac411ef10d23
--- /dev/null
+++ b/examples/train/grpo/qwen2_5_omni/infer.sh
@@ -0,0 +1,7 @@
+MAX_PIXELS=1003520 \
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters vx-xxx/checkpoint-xxx \
+    --load_data_args true \
+    --stream true \
+    --max_new_tokens 2048
diff --git a/examples/train/infer.sh b/examples/train/infer.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ef9622be0a21941d57d3efc932cbd80e22d288fd
--- /dev/null
+++ b/examples/train/infer.sh
@@ -0,0 +1,8 @@
+# If it's full parameter training, use `--model xxx` instead of `--adapters xxx`.
+# If you are using the validation set for inference, add the parameter `--load_data_args true`.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
diff --git a/examples/train/lazy_tokenize/train.sh b/examples/train/lazy_tokenize/train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c6f3a168a7b9e07fe20d4e61cce7edb7a460c3a5
--- /dev/null
+++ b/examples/train/lazy_tokenize/train.sh
@@ -0,0 +1,19 @@
+# 17GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'swift/self-cognition#1000' \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --lazy_tokenize true \
+    --model_author swift \
+    --model_name swift-robot
diff --git a/examples/train/liger/sft.sh b/examples/train/liger/sft.sh
new file mode 100644
index 0000000000000000000000000000000000000000..192381d89698866d7eb16f9da3f82cbbb0813c87
--- /dev/null
+++ b/examples/train/liger/sft.sh
@@ -0,0 +1,31 @@
+# test env: 4 * A100
+# Using use_liger_kernel and packing: 4 * 42GB, 1 hour 35 minutes
+# Not using use_liger_kernel: 4 * 54GB, 1 hour 40 minutes
+# Not using use_liger_kernel and packing: 4 * 52GB, 3 hours 30 minutes
+
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift sft \
+    --model Qwen/Qwen2.5-7B \
+    --train_type full \
+    --dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT#10000' \
+    --torch_dtype bfloat16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --num_train_epochs 5 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 8 \
+    --dataset_num_proc 8 \
+    --save_total_limit 2 \
+    --save_only_model true \
+    --output_dir output/Qwen2.5-7B \
+    --deepspeed zero3 \
+    --attn_impl flash_attn \
+    --packing true \
+    --use_liger_kernel true
diff --git a/examples/train/long_text/liger_kernel.sh b/examples/train/long_text/liger_kernel.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d0ff462883eb142af28855e67a59851fb5e57a50
--- /dev/null
+++ b/examples/train/long_text/liger_kernel.sh
@@ -0,0 +1,29 @@
+# Env: 4 * A100
+# https://github.com/modelscope/ms-swift/blob/main/examples/train/megatron/long_text.sh
+# Max Length: 16K
+# GPU Memory: 4 * 42GB, Training Speed 10s/it
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift sft \
+    --model Qwen/Qwen2.5-7B \
+    --train_type full \
+    --dataset 'AI-ModelScope/LongAlpaca-12k' \
+    --torch_dtype bfloat16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 2 \
+    --packing true \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --logging_steps 5 \
+    --max_length 16384 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 8 \
+    --dataset_num_proc 8 \
+    --save_total_limit 2 \
+    --save_only_model true \
+    --output_dir output/Qwen2.5-7B \
+    --deepspeed zero3 \
+    --use_liger_kernel true \
+    --attn_impl flash_attn
diff --git a/examples/train/long_text/sequence_parallel.sh b/examples/train/long_text/sequence_parallel.sh
new file mode 100644
index 0000000000000000000000000000000000000000..934d31c9f1fc8b2f0c5df977779d053be5eef5a8
--- /dev/null
+++ b/examples/train/long_text/sequence_parallel.sh
@@ -0,0 +1,28 @@
+# Env: 4 * A100
+# Max Length: 16K
+# GPU Memory: 4 * 43GiB, Training Speed 12s/it
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift sft \
+    --model Qwen/Qwen2.5-7B \
+    --train_type full \
+    --dataset 'AI-ModelScope/LongAlpaca-12k' \
+    --torch_dtype bfloat16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 8 \
+    --packing true \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --logging_steps 5 \
+    --max_length 16384 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 8 \
+    --dataset_num_proc 8 \
+    --save_total_limit 2 \
+    --save_only_model true \
+    --output_dir output/Qwen2.5-7B \
+    --deepspeed zero3 \
+    --attn_impl flash_attn \
+    --sequence_parallel_size 4
diff --git a/examples/train/long_text/sequence_parallel_dpo.sh b/examples/train/long_text/sequence_parallel_dpo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6f4f29c4e82ec8200852dcd2c8d249070cc2efcb
--- /dev/null
+++ b/examples/train/long_text/sequence_parallel_dpo.sh
@@ -0,0 +1,27 @@
+# Env: 4 * A100
+# GPU Memory: 4 * 25GiB, Training Speed 14s/it
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+swift rlhf \
+    --rlhf_type dpo \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --train_type full \
+    --dataset swift/RLAIF-V-Dataset \
+    --torch_dtype bfloat16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --logging_steps 5 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 8 \
+    --dataset_num_proc 8 \
+    --save_total_limit 2 \
+    --save_only_model true \
+    --output_dir output/Qwen2.5-VL-3B-Instruct \
+    --deepspeed zero3 \
+    --attn_impl flash_attn \
+    --use_liger_kernel true \
+    --sequence_parallel_size 4
diff --git a/examples/train/lora_sft.sh b/examples/train/lora_sft.sh
new file mode 100644
index 0000000000000000000000000000000000000000..849adedf54b4f3b142afe733368da06551a4bff5
--- /dev/null
+++ b/examples/train/lora_sft.sh
@@ -0,0 +1,29 @@
+# 22GB
+# qwen3: https://github.com/modelscope/ms-swift/blob/main/examples/train/think_model/qwen3_demo1.sh
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
diff --git a/examples/train/megatron/base_to_chat.sh b/examples/train/megatron/base_to_chat.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d4e0c6e4217c378cff07236a2ab9a32563a28c97
--- /dev/null
+++ b/examples/train/megatron/base_to_chat.sh
@@ -0,0 +1,28 @@
+# 8 * 65GiB
+NPROC_PER_NODE=8 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+megatron sft \
+    --load Qwen2.5-14B-mcore \
+    --dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT' \
+    --tensor_model_parallel_size 4 \
+    --micro_batch_size 1 \
+    --global_batch_size 16 \
+    --packing true \
+    --recompute_granularity selective \
+    --train_iters 2000 \
+    --eval_iters 50 \
+    --finetune true \
+    --cross_entropy_loss_fusion true \
+    --lr 1e-5 \
+    --lr_warmup_iters 100 \
+    --min_lr 1e-6 \
+    --save megatron_output/Qwen2.5-14B \
+    --eval_interval 200 \
+    --save_interval 200 \
+    --max_length 8192 \
+    --num_workers 8 \
+    --dataset_num_proc 8 \
+    --no_save_optim true \
+    --no_save_rng true \
+    --sequence_parallel true \
+    --use_flash_attn true
diff --git a/examples/train/megatron/long_text.sh b/examples/train/megatron/long_text.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5f87d8c075d102bf871482cf490ec7075b8ac1c9
--- /dev/null
+++ b/examples/train/megatron/long_text.sh
@@ -0,0 +1,33 @@
+# Env: 4 * A100
+# https://github.com/modelscope/ms-swift/blob/main/examples/train/long_text/zero3.sh
+# Max Length: 32K
+# GPU Memory: 4 * 50GB, Training Speed 23s/it
+NPROC_PER_NODE=4 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+megatron sft \
+    --load Qwen2.5-7B-mcore \
+    --dataset 'ZhipuAI/LongWriter-6k' \
+    --tensor_model_parallel_size 4 \
+    --micro_batch_size 1 \
+    --global_batch_size 8 \
+    --packing true \
+    --recompute_granularity full \
+    --recompute_method uniform \
+    --recompute_num_layers 1 \
+    --train_iters 1000 \
+    --eval_iters 50 \
+    --finetune true \
+    --cross_entropy_loss_fusion true \
+    --lr 1e-5 \
+    --lr_warmup_iters 100 \
+    --min_lr 1e-6 \
+    --save megatron_output/Qwen2.5-7B \
+    --eval_interval 200 \
+    --save_interval 200 \
+    --max_length 32768 \
+    --num_workers 8 \
+    --dataset_num_proc 8 \
+    --no_save_optim true \
+    --no_save_rng true \
+    --sequence_parallel true \
+    --use_flash_attn true