Student0809 commited on Jun 6, 2025

Commit

6586463

verified ·

1 Parent(s): f95d35c

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

ms-swift/.dev_scripts/ci_container_test.sh +41 -0
ms-swift/.dev_scripts/dockerci.sh +88 -0
ms-swift/.github/ISSUE_TEMPLATE/custom.md +8 -0
ms-swift/.github/ISSUE_TEMPLATE/feature_request.md +17 -0
ms-swift/.github/PULL_REQUEST_TEMPLATE.md +13 -0
ms-swift/.github/SECURITY.md +3 -0
ms-swift/.github/workflows/citest.yaml +75 -0
ms-swift/.github/workflows/publish.yaml +29 -0
ms-swift/.gitignore +151 -0
ms-swift/.pre-commit-config.yaml +52 -0
ms-swift/.pre-commit-config_local.yaml +52 -0
ms-swift/CODE_OF_CONDUCT.md +132 -0
ms-swift/CONTRIBUTING.md +63 -0
ms-swift/CONTRIBUTING_CN.md +81 -0
ms-swift/LICENSE +201 -0
ms-swift/MANIFEST.in +5 -0
ms-swift/Makefile +25 -0
ms-swift/README.md +423 -0
ms-swift/README_CN.md +413 -0
ms-swift/add_errorType.py +40 -0
ms-swift/allcorrect_with_error_type.json +0 -0
ms-swift/asset/discord_qr.jpg +0 -0
ms-swift/asset/wechat.png +0 -0
ms-swift/clean_transcripts.py +95 -0
ms-swift/dataset.json +60 -0
ms-swift/dataset_OSST2.json +0 -0
ms-swift/dataset_Overlap2.json +0 -0
ms-swift/dataset_new.json +0 -0
ms-swift/dataset_new_2.json +0 -0
ms-swift/dataset_new_4_overlap.json +0 -0
ms-swift/dataset_overlap5s716_gemini.json +0 -0
ms-swift/docs/Makefile +20 -0
ms-swift/docs/README.md +37 -0
ms-swift/docs/make.bat +35 -0
ms-swift/docs/resources/grpo.png +0 -0
ms-swift/docs/source/.readthedocs.yaml +29 -0
ms-swift/docs/source/BestPractices/GRPO代码训练.md +134 -0
ms-swift/docs/source/BestPractices/GRPO多模态训练.md +306 -0
ms-swift/docs/source/BestPractices/快速训练VL模型.md +227 -0
ms-swift/docs/source/BestPractices/更多最佳实践.md +7 -0
ms-swift/docs/source/Customization/插件化.md +215 -0
ms-swift/docs/source/Customization/自定义模型.md +34 -0
ms-swift/docs/source/GetStarted/Web-UI.md +36 -0
ms-swift/docs/source/Instruction/Agent支持.md +207 -0
ms-swift/docs/source/Instruction/GRPO.md +461 -0
ms-swift/docs/source/Instruction/Megatron-SWIFT训练.md +293 -0
ms-swift/docs/source/Instruction/ReleaseNote3.0.md +80 -0
ms-swift/docs/source/Instruction/人类对齐.md +115 -0
ms-swift/docs/source/Instruction/使用tuners.md +133 -0
ms-swift/docs/source/Instruction/常见问题整理.md +713 -0

ms-swift/.dev_scripts/ci_container_test.sh ADDED Viewed

	@@ -0,0 +1,41 @@

+if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
+    # pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+    pip install -r requirements/tests.txt -i https://mirrors.aliyun.com/pypi/simple/
+    git config --global --add safe.directory /ms-swift
+    git config --global user.email tmp
+    git config --global user.name tmp.com
+    # linter test
+    # use internal project for pre-commit due to the network problem
+    if [ `git remote -v | grep alibaba  | wc -l` -gt 1 ]; then
+        pre-commit run -c .pre-commit-config_local.yaml --all-files
+        if [ $? -ne 0 ]; then
+            echo "linter test failed, please run 'pre-commit run --all-files' to check"
+            echo "From the repository folder"
+            echo "Run 'pip install -r requirements/tests.txt' install test dependencies."
+            echo "Run 'pre-commit install' install pre-commit hooks."
+            echo "Finally run linter with command: 'pre-commit run --all-files' to check."
+            echo "Ensure there is no failure!!!!!!!!"
+            exit -1
+        fi
+    fi
+    pip install -r requirements/framework.txt -U -i https://mirrors.aliyun.com/pypi/simple/
+    pip install diffusers decord einops -U -i https://mirrors.aliyun.com/pypi/simple/
+    pip install autoawq -U --no-deps
+    # test with install
+    pip install .
+    pip install auto_gptq bitsandbytes deepspeed -U -i https://mirrors.aliyun.com/pypi/simple/
+else
+    echo "Running case in release image, run case directly!"
+fi
+# remove torch_extensions folder to avoid ci hang.
+rm -rf ~/.cache/torch_extensions
+if [ $# -eq 0 ]; then
+    ci_command="python tests/run.py --subprocess"
+else
+    ci_command="$@"
+fi
+echo "Running case with command: $ci_command"
+$ci_command

ms-swift/.dev_scripts/dockerci.sh ADDED Viewed

	@@ -0,0 +1,88 @@

+#!/bin/bash
+MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache
+CODE_DIR=$PWD
+CODE_DIR_IN_CONTAINER=/ms-swift
+echo "$USER"
+gpus='0,1 2,3'
+cpu_sets='0-15 16-31'
+cpu_sets_arr=($cpu_sets)
+is_get_file_lock=false
+CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml}
+echo "ci command: $CI_COMMAND"
+PR_CHANGED_FILES="${PR_CHANGED_FILES:-}"
+echo "PR modified files: $PR_CHANGED_FILES"
+PR_CHANGED_FILES=${PR_CHANGED_FILES//[ ]/#}
+echo "PR_CHANGED_FILES: $PR_CHANGED_FILES"
+idx=0
+for gpu in $gpus
+do
+  exec {lock_fd}>"/tmp/gpu$gpu" || exit 1
+  flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; }
+  echo "get gpu lock $gpu"
+  CONTAINER_NAME="swift-ci-$idx"
+  let is_get_file_lock=true
+  # pull image if there are update
+  docker pull ${IMAGE_NAME}:${IMAGE_VERSION}
+  if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
+    echo 'debugging'
+    docker run --rm --name $CONTAINER_NAME --shm-size=16gb \
+              --cpuset-cpus=${cpu_sets_arr[$idx]} \
+              --gpus='"'"device=$gpu"'"' \
+              -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \
+              -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
+              -v $MODELSCOPE_HOME_CACHE/$idx:/root \
+              -v /home/admin/pre-commit:/home/admin/pre-commit \
+              -e CI_TEST=True \
+              -e TEST_LEVEL=$TEST_LEVEL \
+              -e MODELSCOPE_CACHE=$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
+              -e MODELSCOPE_DOMAIN=$MODELSCOPE_DOMAIN \
+              -e MODELSCOPE_SDK_DEBUG=True \
+              -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \
+              -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \
+              -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \
+              -e TEST_LEVEL=$TEST_LEVEL \
+              -e MODELSCOPE_ENVIRONMENT='ci' \
+              -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
+              -e MODEL_TAG_URL=$MODEL_TAG_URL \
+              -e MODELSCOPE_API_TOKEN=$MODELSCOPE_API_TOKEN \
+	            -e PR_CHANGED_FILES=$PR_CHANGED_FILES \
+              --workdir=$CODE_DIR_IN_CONTAINER \
+              ${IMAGE_NAME}:${IMAGE_VERSION} \
+              $CI_COMMAND
+  else
+    docker run --rm --name $CONTAINER_NAME --shm-size=16gb \
+              --cpuset-cpus=${cpu_sets_arr[$idx]} \
+              --gpus='"'"device=$gpu"'"' \
+              -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \
+              -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
+              -v $MODELSCOPE_HOME_CACHE/$idx:/root \
+              -v /home/admin/pre-commit:/home/admin/pre-commit \
+              -e CI_TEST=True \
+              -e TEST_LEVEL=$TEST_LEVEL \
+              -e MODELSCOPE_CACHE=$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
+              -e MODELSCOPE_DOMAIN=$MODELSCOPE_DOMAIN \
+              -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \
+              -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \
+              -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \
+              -e TEST_LEVEL=$TEST_LEVEL \
+              -e MODELSCOPE_ENVIRONMENT='ci' \
+              -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
+              -e MODEL_TAG_URL=$MODEL_TAG_URL \
+              -e MODELSCOPE_API_TOKEN=$MODELSCOPE_API_TOKEN \
+	            -e PR_CHANGED_FILES=$PR_CHANGED_FILES \
+              --workdir=$CODE_DIR_IN_CONTAINER \
+              ${IMAGE_NAME}:${IMAGE_VERSION} \
+              $CI_COMMAND
+  fi
+  if [ $? -ne 0 ]; then
+    echo "Running test case failed, please check the log!"
+    exit -1
+  fi
+  break
+done
+if [ "$is_get_file_lock" = false ] ; then
+    echo 'No free GPU!'
+    exit 1
+fi

ms-swift/.github/ISSUE_TEMPLATE/custom.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+name: Custom issue template
+about: Describe this issue template's purpose here.
+title: ''
+labels: ''
+assignees: ''
+---

ms-swift/.github/ISSUE_TEMPLATE/feature_request.md ADDED Viewed

	@@ -0,0 +1,17 @@

+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+---
+**Describe the feature**
+Please describe the feature requested here(请在这里描述需求)
+**Paste any useful information**
+Paste any useful information, including papers, github links, etc.(请在这里描述其他有用的信息，比如相关的论文地址，github链接等)
+**Additional context**
+Add any other context or information here(其他信息可以写在这里)

ms-swift/.github/PULL_REQUEST_TEMPLATE.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# PR type
+- [ ] Bug Fix
+- [ ] New Feature
+- [ ] Document Updates
+- [ ] More Models or Datasets Support
+# PR information
+Write the detail information belongs to this PR.
+## Experiment results
+Paste your experiment result here(if needed).

ms-swift/.github/SECURITY.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Reporting Security Issues
2	+
3	+ Usually security issues of a deep learning project come from non-standard 3rd packages or continuous running services. If you are suffering from security issues from our project, please consider reporting to us. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions.

ms-swift/.github/workflows/citest.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+name: citest
+on:
+  push:
+    branches:
+      - master
+      - "release/**"
+    paths-ignore:
+      - "setup.*"
+      - "requirements.txt"
+      - "requirements/**"
+      - "docs/**"
+      - "tools/**"
+      - ".dev_scripts/**"
+      - "README.md"
+      - "README_*.md"
+      - "NOTICE"
+      - ".github/workflows/lint.yaml"
+      - ".github/workflows/publish.yaml"
+  pull_request:
+    paths-ignore:
+      - "setup.*"
+      - "requirements.txt"
+      - "requirements/**"
+      - "docs/**"
+      - "tools/**"
+      - ".dev_scripts/**"
+      - "README.md"
+      - "README_*.md"
+      - "NOTICE"
+      - ".github/workflows/lint.yaml"
+      - ".github/workflows/publish.yaml"
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  unittest:
+    # The type of runner that the job will run on
+    runs-on: [self-hosted]
+    timeout-minutes: 240
+    steps:
+      - name: ResetFileMode
+        shell: bash
+        run: |
+          # reset filemode to allow action runner to delete files
+          # generated by root in docker
+          set -e
+          source ~/.bashrc
+          sudo chown -R $USER:$USER  $ACTION_RUNNER_DIR
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+            lfs: 'true'
+            submodules: 'true'
+            fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }}
+      - name: Get changed files
+        id: changed-files
+        run: |
+            if ${{ github.event_name == 'pull_request' }}; then
+                echo "PR_CHANGED_FILES=$(git diff --name-only -r HEAD^1 HEAD | xargs)" >> $GITHUB_ENV
+            else
+                echo "PR_CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }} | xargs)" >> $GITHUB_ENV
+            fi
+      - name: Checkout LFS objects
+        run: git lfs checkout
+      - name: Run unittest
+        shell: bash
+        run: |
+          set -e
+          source /mnt/modelscope/ci_env.sh
+          bash .dev_scripts/dockerci.sh

ms-swift/.github/workflows/publish.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+name: release
+on:
+  push:
+    tags:
+      - 'v**'
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-publish
+  cancel-in-progress: true
+jobs:
+  build-n-publish:
+    runs-on: ubuntu-22.04
+    #if: startsWith(github.event.ref, 'refs/tags')
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+      - name: Install wheel
+        run: pip install wheel packaging setuptools==69.5.1
+      - name: Build ModelScope Swift
+        run: python setup.py sdist bdist_wheel
+      - name: Publish package to PyPI
+        run: |
+          pip install twine
+          twine upload dist/* --skip-existing -u __token__ -p ${{ secrets.PYPI_API_TOKEN }}

ms-swift/.gitignore ADDED Viewed

	@@ -0,0 +1,151 @@

+# Byte-compiled / optimized / DLL files
+tmp
+*.ttf
+__pycache__/
+*.py[cod]
+*$py.class
+test.py
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+/package
+/temp
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.vscode
+.idea
+.run
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+*.whl
+*.tar.gz
+*.swp
+*.log
+*.tar.gz
+source.sh
+tensorboard.sh
+.DS_Store
+replace.sh
+result.png
+result.jpg
+result.mp4
+output/
+outputs/
+wandb/
+*.out
+benchmarks/
+eval_output/
+eval_outputs/
+transformers/
+vlmeval/
+my_model/
+/data
+result/
+images
+/custom/
+megatron_output/
+# Pytorch
+*.pth
+*.pt
+# ast template
+ast_index_file.py

ms-swift/.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+repos:
+  - repo: https://github.com/pycqa/flake8.git
+    rev: 4.0.0
+    hooks:
+      - id: flake8
+        exclude: |
+            (?x)^(
+                thirdparty/|
+                examples/|
+                tests/run.py
+            )$
+  - repo: https://github.com/PyCQA/isort.git
+    rev: 4.3.21
+    hooks:
+      - id: isort
+        exclude: |
+            (?x)^(
+                examples/|
+                tests/run.py|
+                swift/cli/sft.py
+            )$
+  - repo: https://github.com/pre-commit/mirrors-yapf.git
+    rev: v0.30.0
+    hooks:
+      - id: yapf
+        exclude: |
+            (?x)^(
+                thirdparty/|
+                examples/|
+                tests/run.py
+            )$
+  - repo: https://github.com/pre-commit/pre-commit-hooks.git
+    rev: v3.1.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: thirdparty/|tests/run.py
+      - id: check-yaml
+        exclude: thirdparty/|tests/run.py
+      - id: end-of-file-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: requirements-txt-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: double-quote-string-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: check-merge-conflict
+        exclude: thirdparty/|tests/run.py
+      - id: fix-encoding-pragma
+        exclude: thirdparty/|tests/run.py
+        args: ["--remove"]
+      - id: mixed-line-ending
+        exclude: thirdparty/|tests/run.py
+        args: ["--fix=lf"]

ms-swift/.pre-commit-config_local.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+repos:
+  - repo: /home/admin/pre-commit/flake8
+    rev: 4.0.0
+    hooks:
+      - id: flake8
+        exclude: |
+            (?x)^(
+                thirdparty/|
+                examples/|
+                tests/run.py
+            )$
+  - repo: /home/admin/pre-commit/isort
+    rev: 4.3.21
+    hooks:
+      - id: isort
+        exclude: |
+            (?x)^(
+                examples/|
+                tests/run.py|
+                swift/cli/sft.py
+            )$
+  - repo: /home/admin/pre-commit/mirrors-yapf
+    rev: v0.30.0
+    hooks:
+      - id: yapf
+        exclude: |
+            (?x)^(
+                thirdparty/|
+                examples/|
+                tests/run.py
+            )$
+  - repo: /home/admin/pre-commit/pre-commit-hooks
+    rev: v3.1.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: thirdparty/|tests/run.py
+      - id: check-yaml
+        exclude: thirdparty/|tests/run.py
+      - id: end-of-file-fixer
+        exclude: thirdparty/
+      - id: requirements-txt-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: double-quote-string-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: check-merge-conflict
+        exclude: thirdparty/|tests/run.py
+      - id: fix-encoding-pragma
+        exclude: thirdparty/|tests/run.py
+        args: ["--remove"]
+      - id: mixed-line-ending
+        exclude: thirdparty/|tests/run.py
+        args: ["--fix=lf"]

ms-swift/CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,132 @@

+# Contributor Covenant Code of Conduct
+## Our Pledge
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+## Our Standards
+Examples of behavior that contributes to a positive environment for our
+community include:
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+  community
+Examples of unacceptable behavior include:
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+## Enforcement Responsibilities
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+## Scope
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+contact@modelscope.cn.
+All complaints will be reviewed and investigated promptly and fairly.
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+## Enforcement Guidelines
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+### 1. Correction
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+### 2. Warning
+**Community Impact**: A violation through a single incident or series of
+actions.
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+### 3. Temporary Ban
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+### 4. Permanent Ban
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[Mozilla CoC]: https://github.com/mozilla/diversity
+[FAQ]: https://www.contributor-covenant.org/faq
+[translations]: https://www.contributor-covenant.org/translations

ms-swift/CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,63 @@

+# Contributor Guide
+_Welcome to offer PRs, bug reports, documentation supplements or other types of contributions to SWIFT!_
+## Table of Contents
+- [Code of Conduct](#-code-of-conduct)
+- [Contribution Process](#-contribution-process)
+- [Hardware support](#-Hardware-support)
+## 📖 Code of Conduct
+Please refer to our [Code of Conduct documentation](./CODE_OF_CONDUCT.md).
+## 🔁 Contribution Process
+### What We Need
+- New Technologies and New Models: SWIFT needs to support more open-source models and datasets, or new technologies that we have not paid attention to. If you are interested please submit a PR to us.
+- Technical Propagation: If you are interested in technical propagation, you are welcome to help us write tutorials, documents or videos on any website, and send us the link.
+- Community Contribution: You can write technical articles related to SWIFT, and submit them to us. After review and approval, we will publish them on the official ModelScope accounts (Zhihu, WeChat, etc.), with your name assigned.
+### Incentives
+- we will issue electronic certificates to contributors on behalf of the ModelScope community, to encourage your selfless contributions.
+- We will offer small souvenirs related to the ModelScope Community.
+- We will provide free A10 computing power during the development period. For more details, please refer to [Hardware-support](#-Hardware-support) section.
+### Submitting PR (Pull Requests)
+Any feature development is carried out in the form of Fork and then PR on GitHub.
+1. Fork: Go to the [SWIFT](https://github.com/modelscope/swift) page and click the **Fork button**. After completion, a SWIFT code repository will be cloned under your personal organization.
+2. Clone: Clone the code repository generated in the first step to your local machine and **create a new branch** for development. During development, please click the **Sync Fork button** in time to synchronize with the `main` branch to prevent code expiration and conflicts.
+3. Submit PR: After development and testing, push the code to the remote branch. On GitHub, go to the **Pull Requests page**, create a new PR, select your code branch as the source branch, and the `modelscope/swift:main` branch as the target branch.
+4. Write Description: It is necessary to provide a good feature description in the PR, so that the reviewers know the content of your modification.
+5. Review: We hope that the code to be merged is concise and efficient, so we may raise some questions and discuss them. Please note that any issues raised in the review are aimed at the code itself, not at you personally. Once all issues are discussed and resolved, your code will be approved.
+### Code Standards and Development Approach
+SWIFT has conventional variable naming conventions and development approaches. Please follow these approaches as much as possible during development.
+1. Variable names are separated by underscores, and class names are named with the first letter of each word capitalized.
+2. All Python indentation uses four spaces instead of a tab.
+3. Choose well-known open-source libraries, avoid using closed-source libraries or unstable open-source libraries, and avoid repeating the existing code.
+After the PR is submitted, SWIFT will perform two types of tests:
+- Code Lint Test: A static code compliance check test. please make sure that you have performed code lint locally in advance.
+```shell
+pip install pre-commit # In the swift folder
+pre-commit run --all-files # Fix the errors reported by pre-commit until all checks are successful
+```
+- CI Tests: Smoke tests and unit tests, please refer to the next section.
+### Running CI Tests
+Before submitting the PR, please ensure that your development code is protected by test cases, such as smoke tests for new features, or unit tests for various edge cases. Reviewers will also pay attention to this during code review. At the same time, there will be dedicated services running CI Tests, running all test cases, and the code can only be merged after the test cases pass.
+Additionally, since some important tests have been skipped due to long running time, to ensure that your logic is correct, you can run the test locally:
+```shell
+python tests/llm/test_run.py
+```
+Please make sure this test can pass normally.
+## ✅ Hardware support
+SWIFT will provide hardware support for developers, including free GPUs. If needed, please email us ([contact@modelscope.cn](mailto:contact@modelscope.cn)) or join our WeChat group:
+<p align="left">
+<img src="asset/wechat.png" width="250" style="display: inline-block;">
+</p>

ms-swift/CONTRIBUTING_CN.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# 贡献者指引
+*欢迎帮SWIFT提供Feature PR、Bug反馈、文档补充或其他类型的贡献！*
+## 目录
+- [代码规约](#-代码规约)
+- [贡献流程](#-贡献流程)
+- [资源支持](#-资源支持)
+## 📖 代码规约
+请查看我们的[代码规约文档](./CODE_OF_CONDUCT.md).
+## 🔁 贡献流程
+### 我们需要什么
+- 新技术和新模型：SWIFT需要支持更多的开源模型和数据集，或我们没有关注到的新技术，如果您对此有兴趣，可以提交PR给我们。
+- 技术布道：如果您对技术布道有兴趣，欢迎在任何网站上帮我们撰写教程文档或视频等，并将链接发给我们。
+- 社区供稿：您可以撰写和SWIFT有关的技术文章，并供稿给我们，我们审核通过后会在魔搭官方账号（知乎、公众号等）上进行发布，并属上您的名字。
+### 激励
+- 我们会以魔搭社区的身份给贡献者颁发电子证书，以鼓励您的无私贡献。
+- 我们会赠送相关魔搭社区相关周边小礼品。
+- 我们会赠送开发期间的免费A10算力，具体可以查看[资源支持](#-资源支持)章节。
+### 提交PR（Pull Requests）
+任何feature开发都在github上以先Fork后PR的形式进行。
+1. Fork：进入[SWIFT](https://github.com/modelscope/swift)页面后，点击**Fork按钮**执行。完成后会在您的个人组织下克隆出一个SWIFT代码库
+2. Clone：将第一步产生的代码库clone到本地并**拉新分支**进行开发，开发中请及时点击**Sync Fork按钮**同步`main`分支，防止代码过期并冲突
+3. 提交PR：开发、测试完成后将代码推送到远程分支。在github上点击**Pull Requests页面**，新建一个PR，源分支选择您提交的代码分支，目标分支选择`modelscope/swift:main`分支
+4. 撰写描述：在PR中填写良好的feature描述是必要的，让Reviewers知道您的修改内容
+5. Review：我们希望合入的代码简洁高效，因此可能会提出一些问题并讨论。请注意，任何review中提出的问题是针对代码本身，而非您个人。在所有问题讨论通过后，您的代码会被通过
+### 代码规范和开发方式
+SWIFT有约定俗成的变量命名方式和开发方式。在开发中请尽量遵循这些方式。
+1. 变量命名以下划线分割，类名以所有单词首字母大写方式命名
+2. 所有的python缩进都是四个空格取代一个tab
+3. 选用知名的开源库，避免使用闭源库或不稳定的开源库，避免重复造轮子
+SWIFT在PR提交后会进行两类测试：
+- Code Lint测试 对代码进行静态规范走查的测试，为保证改测试通过，请保证本地预先进行了Code lint。方法是：
+  ```shell
+  pip install pre-commit
+  # 在swift文件夹内
+  pre-commit run --all-files
+  # 对pre-commit报的错误进行修改，直到所有的检查都是成功状态
+  ```
+- CI Tests 冒烟测试和单元测试，请查看下一章节
+### Running CI Tests
+在提交PR前，请保证您的开发代码已经受到了测试用例的保护。例如，对新功能的冒烟测试，或者各种边缘case的单元测试等。在代码review时Reviewers也会关注这一点。同时，也会有服务专门运行CI Tests，运行所有的测试用例，测试用例通过后代码才可以合并。
+另外，由于运行时间过长，我们跳过了部分重要测试，为保证您的逻辑是正确的，可以在本地执行该测试：
+```shell
+python tests/llm/test_run.py
+```
+请保证该测试可以正常通过。
+## ✅ 资源支持
+SWIFT会为开发者提供资源支持，包括免费的GPU算力。如果需要请邮件联系我们（[contact@modelscope.cn](mailto:contact@modelscope.cn)）或加入我们的微信群：
+<p align="left">
+<img src="asset/wechat.png" width="250" style="display: inline-block;">
+</p>

ms-swift/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

ms-swift/MANIFEST.in ADDED Viewed

	@@ -0,0 +1,5 @@

+recursive-include swift/utils *.py
+recursive-include swift/llm/dataset/data *.*
+recursive-include swift/llm/ds_config *.json
+recursive-include requirements *.txt
+recursive-include swift/plugin/loss_scale/config *.json

ms-swift/Makefile ADDED Viewed

	@@ -0,0 +1,25 @@

+WHL_BUILD_DIR :=package
+DOC_BUILD_DIR :=docs/build/
+# default rule
+default: whl docs
+.PHONY: docs
+docs:
+	bash .dev_scripts/build_docs.sh
+.PHONY: linter
+linter:
+	bash .dev_scripts/linter.sh
+.PHONY: test
+test:
+	bash .dev_scripts/citest.sh
+.PHONY: whl
+whl:
+	python setup.py sdist bdist_wheel
+.PHONY: clean
+clean:
+	rm -rf  $(WHL_BUILD_DIR) $(DOC_BUILD_DIR)

ms-swift/README.md ADDED Viewed

	@@ -0,0 +1,423 @@

+# SWIFT (Scalable lightWeight Infrastructure for Fine-Tuning)
+<p align="center">
+    <br>
+    <img src="asset/banner.png"/>
+    <br>
+<p>
+<p align="center">
+<a href="https://modelscope.cn/home">ModelScope Community Website</a>
+<br>
+        <a href="README_CN.md">中文</a> &nbsp ｜ &nbsp English &nbsp
+</p>
+<p align="center">
+<img src="https://img.shields.io/badge/python-3.10-5be.svg">
+<img src="https://img.shields.io/badge/pytorch-%E2%89%A52.0-orange.svg">
+<a href="https://github.com/modelscope/modelscope/"><img src="https://img.shields.io/badge/modelscope-%E2%89%A51.19-5D91D4.svg"></a>
+<a href="https://pypi.org/project/ms-swift/"><img src="https://badge.fury.io/py/ms-swift.svg"></a>
+<a href="https://github.com/modelscope/swift/blob/main/LICENSE"><img src="https://img.shields.io/github/license/modelscope/swift"></a>
+<a href="https://pepy.tech/project/ms-swift"><img src="https://pepy.tech/badge/ms-swift"></a>
+<a href="https://github.com/modelscope/swift/pulls"><img src="https://img.shields.io/badge/PR-welcome-55EB99.svg"></a>
+</p>
+<p align="center">
+<a href="https://trendshift.io/repositories/6427" target="_blank"><img src="https://trendshift.io/api/badge/repositories/6427" alt="modelscope%2Fswift | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+<p align="center">
+        <a href="https://arxiv.org/abs/2408.05517">Paper</a> &nbsp ｜ <a href="https://swift.readthedocs.io/en/latest/">English Documentation</a> &nbsp ｜ &nbsp <a href="https://swift.readthedocs.io/zh-cn/latest/">中文文档</a> &nbsp
+</p>
+## 📖 Table of Contents
+- [Groups](#-Groups)
+- [Introduction](#-introduction)
+- [News](#-news)
+- [Installation](#%EF%B8%8F-installation)
+- [Quick Start](#-quick-Start)
+- [Usage](#-Usage)
+- [License](#-License)
+- [Citation](#-citation)
+## ☎ Groups
+You can contact us and communicate with us by adding our group:
+[Discord Group](https://discord.com/invite/D27yfEFVz5)              |  WeChat Group
+:-------------------------:|:-------------------------:
+<img src="asset/discord_qr.jpg" width="200" height="200">  |  <img src="asset/wechat.png" width="200" height="200">
+## 📝 Introduction
+🍲 ms-swift is an official framework provided by the ModelScope community for fine-tuning and deploying large language models and multi-modal large models. It currently supports the training (pre-training, fine-tuning, human alignment), inference, evaluation, quantization, and deployment of 500+ large models and 200+ multi-modal large models. These large language models (LLMs) include models such as Qwen3, Qwen3-MoE, Qwen2.5, InternLM3, GLM4, Mistral, DeepSeek-R1, Yi1.5, TeleChat2, Baichuan2, and Gemma2. The multi-modal LLMs include models such as Qwen2.5-VL, Qwen2-Audio, Llama3.4, Llava, InternVL2.5, MiniCPM-V-2.6, GLM4v, Xcomposer2.5, Yi-VL, DeepSeek-VL2, Phi3.5-Vision, and GOT-OCR2.
+🍔 Additionally, ms-swift incorporates the latest training technologies, including lightweight techniques such as LoRA, QLoRA, Llama-Pro, LongLoRA, GaLore, Q-GaLore, LoRA+, LISA, DoRA, FourierFt, ReFT, UnSloth, and Liger, as well as human alignment training methods like DPO, GRPO, RM, PPO, KTO, CPO, SimPO, and ORPO. ms-swift supports acceleration of inference, evaluation, and deployment modules using vLLM and LMDeploy, and it supports model quantization with technologies like GPTQ, AWQ, and BNB. Furthermore, ms-swift offers a Gradio-based Web UI and a wealth of best practices.
+**Why choose ms-swift?**
+- 🍎 **Model Types**: Supports 500+ pure text large models, **200+ multi-modal large models**, as well as All-to-All multi-modal models, sequence classification models, and embedding models, **covering the entire process from training to deployment**.
+- **Dataset Types**: Comes with 150+ pre-training, fine-tuning, human alignment, multi-modal datasets, and supports custom datasets.
+- **Hardware Support**: Compatible with CPU, RTX series, T4/V100, A10/A100/H100, Ascend NPU, MPS, etc.
+- 🍊 **Lightweight Training**: Supports lightweight fine-tuning methods like LoRA, QLoRA, DoRA, LoRA+, ReFT, RS-LoRA, LLaMAPro, Adapter, GaLore, Q-Galore, LISA, UnSloth, Liger-Kernel.
+- **Distributed Training**: Supports distributed data parallel (DDP), device_map simple model parallelism, DeepSpeed ZeRO2/ZeRO3, FSDP, and other distributed training techniques.
+- **Quantization Training**: Supports training quantized models like BNB, AWQ, GPTQ, AQLM, HQQ, EETQ.
+- **RLHF Training**: Supports human alignment training methods such as DPO, GRPO, RM, PPO, KTO, CPO, SimPO, ORPO for both pure text and multi-modal large models.
+- 🍓 **Multi-Modal Training**: Supports training on different modalities like images, videos, and audio, for tasks like VQA, captioning, OCR, and grounding.
+- **Interface Training**: Provides capabilities for training, inference, evaluation, quantization through an interface, completing the whole large model pipeline.
+- **Plugin and Extension**: Supports custom model and dataset extensions, as well as customization of components like loss, metric, trainer, loss-scale, callback, optimizer.
+- 🍉 **Toolbox Capabilities**: Offers not only training support for large models and multi-modal large models but also covers the entire process of inference, evaluation, quantization, and deployment.
+- **Inference Acceleration**: Supports inference acceleration engines like PyTorch, vLLM, LmDeploy, and provides OpenAI API for accelerating inference, deployment, and evaluation modules.
+- **Model Evaluation**: Uses EvalScope as the evaluation backend and supports evaluation on 100+ datasets for both pure text and multi-modal models.
+- **Model Quantization**: Supports AWQ, GPTQ, and BNB quantized exports, with models that can use vLLM/LmDeploy for inference acceleration and continue training.
+## 🎉 News
+- 🎁 2025.05.11: GRPO now supports custom processing logic for reward models. See the GenRM example [here](./docs/source_en/Instruction/GRPO.md#customized-reward-models) .
+- 🎁 2025.04.15: The ms-swift paper has been accepted by AAAI 2025. You can find the paper at [this link](https://ojs.aaai.org/index.php/AAAI/article/view/35383).
+- 🎁 2025.03.23: Multi-round GRPO is now supported for training multi-turn dialogue scenarios (e.g., agent tool calling). Please refer to the [training script](https://idealab.alibaba-inc.com/examples/train/grpo/internal/train_multi_round.sh).
+- 🎁 2025.03.16: Support for Megatron's parallel training techniques is now available. Please see the [Megatron-SWIFT training documentation](https://swift.readthedocs.io/zh-cn/latest/Instruction/Megatron-SWIFT训练.html).
+- 🎁 2025.03.15: Fine-tuning of embedding models for both pure text and multimodal models is supported. Please check the [training script](https://idealab.alibaba-inc.com/examples/train/embedding).
+- 🎁 2025.03.05: The hybrid mode for GRPO is supported, with a script for training a 72B model on 4 GPUs (4*80G) available [here](https://idealab.alibaba-inc.com/examples/train/grpo/internal/train_72b_4gpu.sh). Tensor parallelism with vllm is also supported, with the training script available [here](https://idealab.alibaba-inc.com/examples/train/grpo/internal/multi_gpu_mp_colocate.sh).
+- 🎁 2025.02.21: The GRPO algorithm now supports LMDeploy, with the training script available [here](https://idealab.alibaba-inc.com/examples/train/grpo/internal/full_lmdeploy.sh). Additionally, the performance of the GRPO algorithm has been tested, achieving a training speed increase of up to 300% using various tricks. Please check the WanDB table [here](https://wandb.ai/tastelikefeet/grpo_perf_test?nw=nwuseryuzezyz).
+- 🎁 2025.02.21: The `swift sample` command is now supported. The reinforcement fine-tuning script can be found [here](https://idealab.alibaba-inc.com/docs/source/Instruction/强化微调.md), and the large model API distillation sampling script is available [here](https://idealab.alibaba-inc.com/examples/sampler/distill/distill.sh).
+- 🔥 2025.02.12: Support for the GRPO (Group Relative Policy Optimization) training algorithm has been added. Documentation is available [here](https://idealab.alibaba-inc.com/docs/source/Instruction/GRPO.md).
+- 🎁 2024.12.04: Major update to **ms-swift 3.0**. Please refer to the [release notes and changes](https://swift.readthedocs.io/zh-cn/latest/Instruction/ReleaseNote3.0.html).
+<details><summary>More</summary>
+- 🎉 2024.08.12: The ms-swift paper has been published on arXiv and can be read [here](https://arxiv.org/abs/2408.05517).
+- 🔥 2024.08.05: Support for using [evalscope](https://github.com/modelscope/evalscope/) as a backend for evaluating large models and multimodal models.
+- 🔥 2024.07.29: Support for using [vllm](https://github.com/vllm-project/vllm) and [lmdeploy](https://github.com/InternLM/lmdeploy) to accelerate inference for large models and multimodal models. When performing infer/deploy/eval, you can specify `--infer_backend vllm/lmdeploy`.
+- 🔥 2024.07.24: Support for human preference alignment training for multimodal large models, including DPO/ORPO/SimPO/CPO/KTO/RM/PPO.
+- 🔥 2024.02.01: Support for Agent training! The training algorithm is derived from [this paper](https://arxiv.org/pdf/2309.00986.pdf).
+</details>
+## 🛠️ Installation
+To install using pip:
+```shell
+pip install ms-swift -U
+```
+To install from source:
+```shell
+# pip install git+https://github.com/modelscope/ms-swift.git
+git clone https://github.com/modelscope/ms-swift.git
+cd ms-swift
+pip install -e .
+```
+Running Environment:
+|              | Range        | Recommended | Notes                                     |
+| ------------ |--------------| ----------- | ----------------------------------------- |
+| python       | >=3.9        | 3.10        |                                           |
+| cuda         |              | cuda12      | No need to install if using CPU, NPU, MPS |
+| torch        | >=2.0        |             |                                           |
+| transformers | >=4.33       | 4.51      |                                           |
+| modelscope   | >=1.23       |             |                                           |
+| peft | >=0.11,<0.16 | ||
+| trl | >=0.13,<0.18 | 0.17 |RLHF|
+| deepspeed    | >=0.14       | 0.14.5 | Training                                  |
+| vllm         | >=0.5.1      | 0.7.3/0.8       | Inference/Deployment/Evaluation           |
+| lmdeploy     | >=0.5        | 0.8       | Inference/Deployment/Evaluation           |
+| evalscope | >=0.11       |  | Evaluation |
+For more optional dependencies, you can refer to [here](https://github.com/modelscope/ms-swift/blob/main/requirements/install_all.sh).
+## 🚀 Quick Start
+10 minutes of self-cognition fine-tuning of Qwen2.5-7B-Instruct on a single 3090 GPU:
+### Command Line Interface
+```shell
+# 22GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
+```
+Tips:
+- If you want to train with a custom dataset, you can refer to [this guide](https://swift.readthedocs.io/en/latest/Customization/Custom-dataset.html) to organize your dataset format and specify `--dataset <dataset_path>`.
+- The `--model_author` and `--model_name` parameters are only effective when the dataset includes `swift/self-cognition`.
+- To train with a different model, simply modify `--model <model_id/model_path>`.
+- By default, ModelScope is used for downloading models and datasets. If you want to use HuggingFace, simply specify `--use_hf true`.
+After training is complete, use the following command to infer with the trained weights:
+- Here, `--adapters` should be replaced with the last checkpoint folder generated during training. Since the adapters folder contains the training parameter file `args.json`, there is no need to specify `--model`, `--system` separately; Swift will automatically read these parameters. To disable this behavior, you can set `--load_args false`.
+```shell
+# Using an interactive command line for inference.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
+# merge-lora and use vLLM for inference acceleration
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --merge_lora true \
+    --infer_backend vllm \
+    --max_model_len 8192 \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+Finally, use the following command to push the model to ModelScope:
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --push_to_hub true \
+    --hub_model_id '<your-model-id>' \
+    --hub_token '<your-sdk-token>' \
+    --use_hf false
+```
+### Web-UI
+The Web-UI is a **zero-threshold** training and deployment interface solution based on Gradio interface technology. For more details, you can check [here](https://swift.readthedocs.io/en/latest/GetStarted/Web-UI.html).
+```shell
+SWIFT_UI_LANG=en swift web-ui
+```
+![image.png](./docs/resources/web-ui-en.jpg)
+### Using Python
+ms-swift also supports training and inference using Python. Below is pseudocode for training and inference. For more details, you can refer to [here](https://github.com/modelscope/ms-swift/blob/main/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb).
+Training:
+```python
+# Retrieve the model and template, and add a trainable LoRA module
+model, tokenizer = get_model_tokenizer(model_id_or_path, ...)
+template = get_template(model.model_meta.template, tokenizer, ...)
+model = Swift.prepare_model(model, lora_config)
+# Download and load the dataset, and encode the text into tokens
+train_dataset, val_dataset = load_dataset(dataset_id_or_path, ...)
+train_dataset = EncodePreprocessor(template=template)(train_dataset, num_proc=num_proc)
+val_dataset = EncodePreprocessor(template=template)(val_dataset, num_proc=num_proc)
+# Train the model
+trainer = Seq2SeqTrainer(
+    model=model,
+    args=training_args,
+    data_collator=template.data_collator,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    template=template,
+)
+trainer.train()
+```
+Inference:
+```python
+# Perform inference using the native PyTorch engine
+engine = PtEngine(model_id_or_path, adapters=[lora_checkpoint])
+infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
+request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature)
+resp_list = engine.infer([infer_request], request_config)
+print(f'response: {resp_list[0].choices[0].message.content}')
+```
+## ✨ Usage
+Here is a minimal example of training to deployment using ms-swift. For more details, you can check the [examples](https://github.com/modelscope/ms-swift/tree/main/examples).
+- If you want to use other models or datasets (including multimodal models and datasets), you only need to modify `--model` to specify the corresponding model's ID or path, and modify `--dataset` to specify the corresponding dataset's ID or path.
+- By default, ModelScope is used for downloading models and datasets. If you want to use HuggingFace, simply specify `--use_hf true`.
+|   Useful Links |
+| ------ |
+|   [🔥Command Line Parameters](https://swift.readthedocs.io/en/latest/Instruction/Command-line-parameters.html)   |
+|   [Supported Models and Datasets](https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html)   |
+|   [Custom Models](https://swift.readthedocs.io/en/latest/Customization/Custom-model.html), [🔥Custom Datasets](https://swift.readthedocs.io/en/latest/Customization/Custom-dataset.html)   |
+|   [LLM Tutorial](https://github.com/modelscope/modelscope-classroom/tree/main/LLM-tutorial)   |
+### Training
+Supported Training Methods:
+| Method                             | Full-Parameter                                               | LoRA                                                                                        | QLoRA                                                        | Deepspeed                                                    | Multi-Node                                                   | Multi-Modal                                                                                  |
+|------------------------------------|--------------------------------------------------------------|---------------------------------------------------------------------------------------------|--------------------------------------------------------------|--------------------------------------------------------------|--------------------------------------------------------------|----------------------------------------------------------------------------------------------|
+| Pre-training                       | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/pretrain/train.sh) | ✅                                                                                           | ✅                                                            | ✅                                                            | ✅                                                            | ✅                                                                                            |
+| Instruction Supervised Fine-tuning | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/full/train.sh) | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/lora_sft.sh)            | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/qlora) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-gpu/deepspeed) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node)                                                            | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal)              |
+| DPO Training                       | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/dpo.sh)            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/dpo.sh) | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/rlhf/dpo.sh)  |
+| GRPO Training                      | [✅]((https://github.com/modelscope/ms-swift/blob/main/examples/train/grpo/internal/grpo_zero2.sh)) | ✅                                                                                           | ✅                                                            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/grpo/internal/multi_node)                                    | ✅                                                                                            |
+| Reward Model Training              | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/rm.sh)             | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/rm.sh) | ✅                                                            | ✅                                                                                            |
+| PPO Training                       | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/ppo.sh)            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/ppo.sh) | ✅                                                            | ❌                                                                                            |
+| KTO Training                       | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/kto.sh)            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/kto.sh) | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/rlhf/kto.sh)  |
+| CPO Training                       | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/cpo.sh)            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/cpo.sh) | ✅                                                            | ✅                                                                                            |
+| SimPO Training                     | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/simpo.sh)          | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/simpo.sh) | ✅                                                            | ✅                                                                                            |
+| ORPO Training                      | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/orpo.sh)           | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/orpo.sh) | ✅                                                            | ✅                                                                                            |
+| Classification Model Training      | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/seq_cls/qwen2_5/sft.sh) | ✅                                                            | ✅                                                            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/seq_cls/qwen2_vl/sft.sh) |
+| Embedding Model Training           | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train_gte.sh) | ✅                                                            | ✅                                                            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train_gme.sh)  |
+Pre-training:
+```shell
+# 8*A100
+NPROC_PER_NODE=8 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift pt \
+    --model Qwen/Qwen2.5-7B \
+    --dataset swift/chinese-c4 \
+    --streaming true \
+    --train_type full \
+    --deepspeed zero2 \
+    --output_dir output \
+    --max_steps 10000 \
+    ...
+```
+Fine-tuning:
+```shell
+CUDA_VISIBLE_DEVICES=0 swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset AI-ModelScope/alpaca-gpt4-data-en \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+RLHF:
+```shell
+CUDA_VISIBLE_DEVICES=0 swift rlhf \
+    --rlhf_type dpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+### Inference
+```shell
+CUDA_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --stream true \
+    --infer_backend pt \
+    --max_new_tokens 2048
+# LoRA
+CUDA_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --adapters swift/test_lora \
+    --stream true \
+    --infer_backend pt \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+### Interface Inference
+```shell
+CUDA_VISIBLE_DEVICES=0 swift app \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --stream true \
+    --infer_backend pt \
+    --max_new_tokens 2048
+```
+### Deployment
+```shell
+CUDA_VISIBLE_DEVICES=0 swift deploy \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend vllm
+```
+### Sampling
+```shell
+CUDA_VISIBLE_DEVICES=0 swift sample \
+    --model LLM-Research/Meta-Llama-3.1-8B-Instruct \
+    --sampler_engine pt \
+    --num_return_sequences 5 \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5
+```
+### Evaluation
+```shell
+CUDA_VISIBLE_DEVICES=0 swift eval \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend lmdeploy \
+    --eval_backend OpenCompass \
+    --eval_dataset ARC_c
+```
+### Quantization
+```shell
+CUDA_VISIBLE_DEVICES=0 swift export \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --quant_bits 4 --quant_method awq \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh \
+    --output_dir Qwen2.5-7B-Instruct-AWQ
+```
+### Push Model
+```shell
+swift export \
+    --model <model-path> \
+    --push_to_hub true \
+    --hub_model_id '<model-id>' \
+    --hub_token '<sdk-token>'
+```
+## 🏛 License
+This framework is licensed under the [Apache License (Version 2.0)](https://github.com/modelscope/modelscope/blob/master/LICENSE). For models and datasets, please refer to the original resource page and follow the corresponding License.
+## 📎 Citation
+```bibtex
+@misc{zhao2024swiftascalablelightweightinfrastructure,
+      title={SWIFT:A Scalable lightWeight Infrastructure for Fine-Tuning},
+      author={Yuze Zhao and Jintao Huang and Jinghan Hu and Xingjun Wang and Yunlin Mao and Daoze Zhang and Zeyinzi Jiang and Zhikai Wu and Baole Ai and Ang Wang and Wenmeng Zhou and Yingda Chen},
+      year={2024},
+      eprint={2408.05517},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2408.05517},
+}
+```
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=modelscope/swift&type=Date)](https://star-history.com/#modelscope/ms-swift&Date)

ms-swift/README_CN.md ADDED Viewed

	@@ -0,0 +1,413 @@

+# SWIFT (Scalable lightWeight Infrastructure for Fine-Tuning)
+<p align="center">
+    <br>
+    <img src="asset/banner.png"/>
+    <br>
+<p>
+<p align="center">
+<a href="https://modelscope.cn/home">魔搭社区官网</a>
+<br>
+        中文&nbsp ｜ &nbsp<a href="README.md">English</a>&nbsp
+</p>
+<p align="center">
+<img src="https://img.shields.io/badge/python-3.10-5be.svg">
+<img src="https://img.shields.io/badge/pytorch-%E2%89%A52.0-orange.svg">
+<a href="https://github.com/modelscope/modelscope/"><img src="https://img.shields.io/badge/modelscope-%E2%89%A51.19-5D91D4.svg"></a>
+<a href="https://pypi.org/project/ms-swift/"><img src="https://badge.fury.io/py/ms-swift.svg"></a>
+<a href="https://github.com/modelscope/swift/blob/main/LICENSE"><img src="https://img.shields.io/github/license/modelscope/swift"></a>
+<a href="https://pepy.tech/project/ms-swift"><img src="https://pepy.tech/badge/ms-swift"></a>
+<a href="https://github.com/modelscope/swift/pulls"><img src="https://img.shields.io/badge/PR-welcome-55EB99.svg"></a>
+</p>
+<p align="center">
+<a href="https://trendshift.io/repositories/6427" target="_blank"><img src="https://trendshift.io/api/badge/repositories/6427" alt="modelscope%2Fswift | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+<p align="center">
+        <a href="https://arxiv.org/abs/2408.05517">论文</a> &nbsp ｜ <a href="https://swift.readthedocs.io/en/latest/">English Documentation</a> &nbsp ｜ &nbsp <a href="https://swift.readthedocs.io/zh-cn/latest/">中文文档</a> &nbsp
+</p>
+##  📖 目录
+- [用户群](#-用户群)
+- [简介](#-简介)
+- [新闻](#-新闻)
+- [安装](#%EF%B8%8F-安装)
+- [快速开始](#-快速开始)
+- [如何使用](#-如何使用)
+- [License](#-license)
+- [引用](#-引用)
+## ☎ 用户群
+请扫描下面的二维码来加入我们的交流群：
+[Discord Group](https://discord.com/invite/D27yfEFVz5)              |  微信群
+:-------------------------:|:-------------------------:
+<img src="asset/discord_qr.jpg" width="200" height="200">  |  <img src="asset/wechat.png" width="200" height="200">
+## 📝 简介
+🍲 ms-swift是魔搭社区提供的大模型与多模态大模型微调部署框架，现已支持500+大模型与200+多模态大模型的训练（预训练、微调、人类对齐）、推理、评测、量化与部署。其中大模型包括：Qwen3、Qwen3-MoE、Qwen2.5、InternLM3、GLM4、Mistral、DeepSeek-R1、Yi1.5、TeleChat2、Baichuan2、Gemma2等模型，多模态大模型包括：Qwen2.5-VL、Qwen2-Audio、Llama4、Llava、InternVL2.5、MiniCPM-V-2.6、GLM4v、Xcomposer2.5、Yi-VL、DeepSeek-VL2、Phi3.5-Vision、GOT-OCR2等模型。
+🍔 除此之外，ms-swift汇集了最新的训练技术，包括LoRA、QLoRA、Llama-Pro、LongLoRA、GaLore、Q-GaLore、LoRA+、LISA、DoRA、FourierFt、ReFT、UnSloth、和Liger等轻量化训练技术，以及DPO、GRPO、RM、PPO、KTO、CPO、SimPO、ORPO等人类对齐训练方法。ms-swift支持使用vLLM和LMDeploy对推理、评测和部署模块进行加速，并支持使用GPTQ、AWQ、BNB等技术对大模型进行量化。ms-swift还提供了基于Gradio的Web-UI界面及丰富的最佳实践。
+**为什么选择ms-swift？**
+- 🍎 **模型类型**：支持500+纯文本大模型、**200+多模态大模型**以及All-to-All全模态模型、序列分类模型、Embedding模型**训练到部署全流程**。
+- **数据集类型**：内置150+预训练、微调、人类对齐、多模态等各种类型的数据集，并支持自定义数据集。
+- **硬件支持**：CPU、RTX系列、T4/V100、A10/A100/H100、Ascend NPU、MPS等。
+- 🍊 **轻量训练**：支持了LoRA、QLoRA、DoRA、LoRA+、ReFT、RS-LoRA、LLaMAPro、Adapter、GaLore、Q-Galore、LISA、UnSloth、Liger-Kernel等轻量微调方式。
+- **分布式训练**：支持分布式数据并行（DDP）、device_map简易模型并行、DeepSpeed ZeRO2 ZeRO3、FSDP等分布式训练技术。
+- **量化训练**：支持对BNB、AWQ、GPTQ、AQLM、HQQ、EETQ量化模型进行训练。
+- **RLHF训练**：支持纯文本大模型和多模态大模型的DPO、GRPO、RM、PPO、KTO、CPO、SimPO、ORPO等人类对齐训练方法。
+- 🍓 **多模态训练**：支持对图像、视频和语音不同模态模型进行训练，支持VQA、Caption、OCR、Grounding任务的训练。
+- **界面训练**：以界面的方式提供训练、推理、评测、量化的能力，完成大模型的全链路。
+- **插件化与拓展**：支持自定义模型和数据集拓展，支持对loss、metric、trainer、loss-scale、callback、optimizer等组件进行自定义。
+- 🍉 **工具箱能力**：不仅提供大模型和多模态大模型的训练支持，还涵盖其推理、评测、量化和部署全流程。
+- **推理加速**：支持PyTorch、vLLM、LmDeploy推理加速引擎，并提供OpenAI接口，为推理、部署和评测模块提供加速。
+- **模型评测**：以EvalScope作为评测后端，支持100+评测数据集对纯��本和多模态模型进行评测。
+- **模型量化**：支持AWQ、GPTQ和BNB的量化导出，导出的模型支持使用vLLM/LmDeploy推理加速，并支持继续训练。
+## 🎉 新闻
+- 🎁 2025.05.11: GRPO中的奖励模型支持自定义处理逻辑，GenRM的例子参考[这里](./docs/source/Instruction/GRPO.md#自定义奖励模型)
+- 🎁 2025.04.15: ms-swift论文已经被AAAI 2025接收，论文地址在[这里](https://ojs.aaai.org/index.php/AAAI/article/view/35383)。
+- 🎁 2025.03.23: 支持了多轮GRPO，用于构建多轮对话场景的训练(例如agent tool calling)，请查看[训练脚本](examples/train/grpo/internal/train_multi_round.sh)。
+- 🎁 2025.03.16: 支持了Megatron的并行技术进行训练，请查看[Megatron-SWIFT训练文档](https://swift.readthedocs.io/zh-cn/latest/Instruction/Megatron-SWIFT训练.html)。
+- 🎁 2025.03.15: 支持纯文本和多模态模型的embedding模型的微调，请查看[训练脚本](examples/train/embedding)。
+- 🎁 2025.03.05: 支持GRPO的hybrid模式，4GPU(4*80G)训练72B模型的脚本参考[这里](examples/train/grpo/internal/train_72b_4gpu.sh)。同时支持vllm的tensor并行，训练脚本参考[这里](examples/train/grpo/internal/multi_gpu_mp_colocate.sh)。
+- 🎁 2025.02.21: GRPO算法支持使用LMDeploy，训练脚本参考[这里](examples/train/grpo/internal/full_lmdeploy.sh)。此外测试了GRPO算法的性能，使用一些tricks使训练速度提高到300%。WanDB表格请查看[这里](https://wandb.ai/tastelikefeet/grpo_perf_test?nw=nwuseryuzezyz)。
+- 🎁 2025.02.21: 支持`swift sample`命令。强化微调脚本参考[这里](docs/source/Instruction/强化微调.md)，大模型API蒸馏采样脚本参考[这里](examples/sampler/distill/distill.sh)。
+- 🔥 2025.02.12: 支持GRPO (Group Relative Policy Optimization) 训练算法，文档参考[这里](docs/source/Instruction/GRPO.md)。
+- 🎁 2024.12.04: **ms-swift3.0**大版本更新。请查看[发布说明和更改](https://swift.readthedocs.io/zh-cn/latest/Instruction/ReleaseNote3.0.html)。
+<details><summary>更多</summary>
+- 🎉 2024.08.12: ms-swift论文已经发布到arXiv上，可以点击[这里](https://arxiv.org/abs/2408.05517)阅读。
+- 🔥 2024.08.05: 支持使用[evalscope](https://github.com/modelscope/evalscope/)作为后端进行大模型和多模态模型的评测。
+- 🔥 2024.07.29: 支持使用[vllm](https://github.com/vllm-project/vllm), [lmdeploy](https://github.com/InternLM/lmdeploy)对大模型和多模态大模型进行推理加速，在infer/deploy/eval时额外指定`--infer_backend vllm/lmdeploy`即可。
+- 🔥 2024.07.24: 支持对多模态大模型进行人类偏好对齐训练，包括DPO/ORPO/SimPO/CPO/KTO/RM/PPO。
+- 🔥 2024.02.01: 支持Agent训练！训练算法源自这篇[论文](https://arxiv.org/pdf/2309.00986.pdf)。
+</details>
+## 🛠️ 安装
+使用pip进行安装：
+```shell
+pip install ms-swift -U
+```
+从源代码安装：
+```shell
+# pip install git+https://github.com/modelscope/ms-swift.git
+git clone https://github.com/modelscope/ms-swift.git
+cd ms-swift
+pip install -e .
+```
+运行环境：
+|        | 范围           | 推荐 | 备注 |
+| ------ |--------------| ---- | --|
+| python | >=3.9        | 3.10 ||
+| cuda |              | cuda12 |使用cpu、npu、mps则无需安装|
+| torch | >=2.0        |  ||
+| transformers | >=4.33       | 4.51 ||
+| modelscope | >=1.23       |  ||
+| peft | >=0.11,<0.16 | ||
+| trl | >=0.13,<0.18 | 0.17 |RLHF|
+| deepspeed | >=0.14       | 0.14.5 |训练|
+| vllm | >=0.5.1      | 0.7.3/0.8 |推理/部署/评测|
+| lmdeploy | >=0.5        | 0.8 |推理/部署/评测|
+| evalscope | >=0.11       | |评测|
+更多可选依赖可以参考[这里](https://github.com/modelscope/ms-swift/blob/main/requirements/install_all.sh)。
+## 🚀 快速开始
+**10分钟**在单卡3090上对Qwen2.5-7B-Instruct进行自我认知微调：
+### 命令行
+```shell
+# 22GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
+```
+小贴士：
+- 如果要使用自定义数据集进行训练，你可以参考[这里](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)组织数据集格式，并指定`--dataset <dataset_path>`。
+- `--model_author`和`--model_name`参数只有当数据集中包含`swift/self-cognition`时才生效。
+- 如果要使用其他模型进行训练，你只需要修改`--model <model_id/model_path>`即可。
+- 默认使用ModelScope进行模型和数据集的下载。如果要使用HuggingFace，指定`--use_hf true`即可。
+训练完成后，使用以下命令对训练后的权重进行推理：
+- 这里的`--adapters`需要替换成训练生成的last checkpoint文件夹。由于adapters文件夹中包含了训练的参数文件`args.json`，因此不需要额外指定`--model`，`--system`，swift会自动读取这些参数。如果要关闭此行为，可以设置`--load_args false`。
+```shell
+# 使用交互式命令行进行推理
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
+# merge-lora并使用vLLM进行推理加速
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --merge_lora true \
+    --infer_backend vllm \
+    --max_model_len 8192 \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+最后，使用以下命令将模型推送到ModelScope：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --push_to_hub true \
+    --hub_model_id '<your-model-id>' \
+    --hub_token '<your-sdk-token>' \
+    --use_hf false
+```
+### Web-UI
+Web-UI是基于gradio界面技术的**零门槛**训练、部署界面方案，具体可以查看[这里](https://swift.readthedocs.io/zh-cn/latest/GetStarted/Web-UI.html)。
+```shell
+swift web-ui
+```
+![image.png](./docs/resources/web-ui.jpg)
+### 使用Python
+ms-swift也支持使用python的方式进行训练和推理。下面给出训练和推理的**伪代码**，具体可以查看[这里](https://github.com/modelscope/ms-swift/blob/main/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb)。
+训练：
+```python
+# 获取模型和template，并加入可训练的LoRA模块
+model, tokenizer = get_model_tokenizer(model_id_or_path, ...)
+template = get_template(model.model_meta.template, tokenizer, ...)
+model = Swift.prepare_model(model, lora_config)
+# 下载并载入数据集，并将文本encode成tokens
+train_dataset, val_dataset = load_dataset(dataset_id_or_path, ...)
+train_dataset = EncodePreprocessor(template=template)(train_dataset, num_proc=num_proc)
+val_dataset = EncodePreprocessor(template=template)(val_dataset, num_proc=num_proc)
+# 进行训练
+trainer = Seq2SeqTrainer(
+    model=model,
+    args=training_args,
+    data_collator=template.data_collator,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    template=template,
+)
+trainer.train()
+```
+推理：
+```python
+# 使用原生pytorch引擎进行推理
+engine = PtEngine(model_id_or_path, adapters=[lora_checkpoint])
+infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
+request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature)
+resp_list = engine.infer([infer_request], request_config)
+print(f'response: {resp_list[0].choices[0].message.content}')
+```
+## ✨ 如何使用
+这里给出使用ms-swift进行训练到部署到最简示例，具体可以查看[examples](https://github.com/modelscope/ms-swift/tree/main/examples)。
+- 若想使用其他模型或者数据集（含多模态模型和数据集），你只需要修改`--model`指定对应模型的id或者path，修改`--dataset`指定对应数据集的id或者path即可。
+- 默认使用ModelScope进行模型和数据集的下载。如果要使用HuggingFace，指定`--use_hf true`即可。
+|   常用链接 |
+| ------ |
+|   [🔥命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)   |
+|   [支持的模型和数据集](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.html)   |
+|   [自定义模型](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%A8%A1%E5%9E%8B.html), [🔥自定义数据集](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)   |
+|   [大模型教程](https://github.com/modelscope/modelscope-classroom/tree/main/LLM-tutorial)   |
+### 训练
+支持的训练方法：
+| 方法   | 全参数 | LoRA                                                                                        | QLoRA | Deepspeed | 多机 | 多模态                                                                                          |
+| ------ | ------ |---------------------------------------------------------------------------------------------| ----- | ------ | ------ |----------------------------------------------------------------------------------------------|
+| 预训练 | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/pretrain/train.sh) | ✅                                                                                           | ✅ | ✅ | ✅ | ✅                                                                                            |
+| 指令监督微调 | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/full/train.sh) | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/lora_sft.sh)            | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/qlora) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-gpu/deepspeed) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal)              |
+| DPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/dpo.sh)            | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/dpo.sh) | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/rlhf/dpo.sh)  |
+| GRPO训练 | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/grpo/internal/grpo_zero2.sh) | ✅                                                                                           | ✅ | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/grpo/internal/multi_node) | ✅                                                                                            |
+| 奖励模型训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/rm.sh)             | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/rm.sh) | ✅ | ✅                                                                                            |
+| PPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/ppo.sh)            | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/ppo.sh) | ✅ | ❌                                                                                            |
+| KTO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/kto.sh)            | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/kto.sh) | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/rlhf/kto.sh)  |
+| CPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/cpo.sh)            | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/cpo.sh) | ✅ | ✅                                                                                            |
+| SimPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/simpo.sh)          | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/simpo.sh) | ✅ | ✅                                                                                            |
+| ORPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/orpo.sh)           | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/orpo.sh) | ✅ | ✅                                                                                            |
+| 分类模型训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/seq_cls/qwen2_5/sft.sh) | ✅ | ✅ | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/seq_cls/qwen2_vl/sft.sh) |
+| Embedding模型训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train_gte.sh) | ✅ | ✅ | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train_gme.sh)  |
+预训练：
+```shell
+# 8*A100
+NPROC_PER_NODE=8 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift pt \
+    --model Qwen/Qwen2.5-7B \
+    --dataset swift/chinese-c4 \
+    --streaming true \
+    --train_type full \
+    --deepspeed zero2 \
+    --output_dir output \
+    --max_steps 10000 \
+    ...
+```
+微调：
+```shell
+CUDA_VISIBLE_DEVICES=0 swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+RLHF：
+```shell
+CUDA_VISIBLE_DEVICES=0 swift rlhf \
+    --rlhf_type dpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+### 推理
+```shell
+CUDA_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --stream true \
+    --infer_backend pt \
+    --max_new_tokens 2048
+# LoRA
+CUDA_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --adapters swift/test_lora \
+    --stream true \
+    --infer_backend pt \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+### 界面推理
+```shell
+CUDA_VISIBLE_DEVICES=0 swift app \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --stream true \
+    --infer_backend pt \
+    --max_new_tokens 2048 \
+    --lang zh
+```
+### 部署
+```shell
+CUDA_VISIBLE_DEVICES=0 swift deploy \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend vllm
+```
+### 采样
+```shell
+CUDA_VISIBLE_DEVICES=0 swift sample \
+    --model LLM-Research/Meta-Llama-3.1-8B-Instruct \
+    --sampler_engine pt \
+    --num_return_sequences 5 \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5
+```
+### 评测
+```shell
+CUDA_VISIBLE_DEVICES=0 swift eval \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend lmdeploy \
+    --eval_backend OpenCompass \
+    --eval_dataset ARC_c
+```
+### 量化
+```shell
+CUDA_VISIBLE_DEVICES=0 swift export \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --quant_bits 4 --quant_method awq \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh \
+    --output_dir Qwen2.5-7B-Instruct-AWQ
+```
+### 推送模型
+```shell
+swift export \
+    --model <model-path> \
+    --push_to_hub true \
+    --hub_model_id '<model-id>' \
+    --hub_token '<sdk-token>'
+```
+## 🏛 License
+本框架使用[Apache License (Version 2.0)](https://github.com/modelscope/modelscope/blob/master/LICENSE)进行许可。模型和数据集请查看原资源页面并遵守对应License。
+## 📎 引用
+```bibtex
+@misc{zhao2024swiftascalablelightweightinfrastructure,
+      title={SWIFT:A Scalable lightWeight Infrastructure for Fine-Tuning},
+      author={Yuze Zhao and Jintao Huang and Jinghan Hu and Xingjun Wang and Yunlin Mao and Daoze Zhang and Zeyinzi Jiang and Zhikai Wu and Baole Ai and Ang Wang and Wenmeng Zhou and Yingda Chen},
+      year={2024},
+      eprint={2408.05517},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2408.05517},
+}
+```
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=modelscope/swift&type=Date)](https://star-history.com/#modelscope/ms-swift&Date)

ms-swift/add_errorType.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import json
+# 读取原始的allcorrect.json文件
+with open('ms-swift/matched_scores_2_1.json', 'r', encoding='utf-8') as f:
+    allcorrect_data = json.load(f)
+# 读取merged_shuffled_train.json文件
+with open('/root/autodl-tmp/600_train/merged_shuffled_train.json', 'r', encoding='utf-8') as f:
+    merged_data = json.load(f)
+# 遍历allcorrect.json中的每个条目
+for entry in allcorrect_data:
+    # 获取key
+    key = entry.get('key')
+    if key:
+        # 在merged_data中查找对应的条目
+        if key in merged_data:
+            # 获取error_type并添加到entry中
+            error_type = merged_data[key].get('error_type')
+            entry['error_type'] = error_type
+# 将更新后的数据写回文件
+output_file = 'ms-swift/allcorrect_with_error_type.json'
+with open(output_file, 'w', encoding='utf-8') as f:
+    json.dump(allcorrect_data, f, ensure_ascii=False, indent=2)
+print(f"处理完成，结果已保存到 {output_file}")
+# 统计error_type的分布
+error_type_stats = {}
+for entry in allcorrect_data:
+    error_type = entry.get('error_type')
+    if error_type:
+        error_type_stats[error_type] = error_type_stats.get(error_type, 0) + 1
+    else:
+        error_type_stats['no_error_type'] = error_type_stats.get('no_error_type', 0) + 1
+print("\nError Type 统计:")
+for error_type, count in error_type_stats.items():
+    print(f"{error_type}: {count}")

ms-swift/allcorrect_with_error_type.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/asset/discord_qr.jpg ADDED Viewed

ms-swift/asset/wechat.png ADDED Viewed

ms-swift/clean_transcripts.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import json
+import re
+from typing import List, Dict, Tuple
+def parse_timestamp(timestamp: str) -> Tuple[int, int]:
+    """Convert timestamp string like '00:15' to seconds."""
+    minutes, seconds = map(int, timestamp.split(':'))
+    return minutes * 60 + seconds
+def extract_time_and_speaker(line: str) -> Tuple[Tuple[int, int], str]:
+    """Extract time range and speaker from a line."""
+    # Extract time range
+    time_match = re.match(r'\[(\d{2}:\d{2}) - (\d{2}:\d{2})\] (Speaker [A-Z]):', line)
+    if not time_match:
+        return None, None
+    start_time = parse_timestamp(time_match.group(1))
+    end_time = parse_timestamp(time_match.group(2))
+    speaker = time_match.group(3)
+    return (start_time, end_time), speaker
+def has_overlap(range1: Tuple[int, int], range2: Tuple[int, int]) -> bool:
+    """Check if two time ranges overlap."""
+    start1, end1 = range1
+    start2, end2 = range2
+    return not (end1 <= start2 or end2 <= start1)
+def has_same_speaker_overlap(transcript: str) -> bool:
+    """Check if a transcript contains overlapping timestamps for the same speaker."""
+    lines = transcript.split('\n')
+    # Dictionary to store time ranges for each speaker
+    speaker_ranges = {}
+    for line in lines:
+        if not line.strip():
+            continue
+        time_range, speaker = extract_time_and_speaker(line)
+        if time_range is None or speaker is None:
+            continue
+        # Check for overlaps with existing ranges of the same speaker
+        if speaker in speaker_ranges:
+            for existing_range in speaker_ranges[speaker]:
+                if has_overlap(time_range, existing_range):
+                    return True
+            speaker_ranges[speaker].append(time_range)
+        else:
+            speaker_ranges[speaker] = [time_range]
+    return False
+def process_file(input_file: str, output_file: str, delete_file: str):
+    """Process the JSON file and separate entries with same-speaker overlapping timestamps."""
+    with open(input_file, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    if isinstance(data, dict):
+        data = [data]
+    cleaned_data = []
+    deleted_data = []
+    removed_count = 0
+    for entry in data:
+        if 'model_output' in entry:
+            if not has_same_speaker_overlap(entry['model_output']):
+                cleaned_data.append(entry)
+            else:
+                deleted_data.append(entry)
+                removed_count += 1
+                print(f"Removing entry with key: {entry.get('key', 'unknown')}")
+    # Save cleaned data
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(cleaned_data, f, ensure_ascii=False, indent=2)
+    # Save deleted data
+    with open(delete_file, 'w', encoding='utf-8') as f:
+        json.dump(deleted_data, f, ensure_ascii=False, indent=2)
+    print(f"\nProcessing Summary:")
+    print(f"Processed {len(data)} entries")
+    print(f"Removed {removed_count} entries with same-speaker overlapping timestamps")
+    print(f"Remaining entries: {len(cleaned_data)}")
+if __name__ == '__main__':
+    input_file = 'silence_overlaps/transcriptions.json'
+    output_file = 'silence_overlaps/cleaned_transcriptions2.json'
+    delete_file = 'silence_overlaps/delete_transcript2.json'
+    process_file(input_file, output_file, delete_file)
+    print(f"\nCleaned transcriptions have been saved to {output_file}")
+    print(f"Deleted entries have been saved to {delete_file}")

ms-swift/dataset.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音��了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}

ms-swift/dataset_OSST2.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/dataset_Overlap2.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/dataset_new.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/dataset_new_2.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/dataset_new_4_overlap.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/dataset_overlap5s716_gemini.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/docs/Makefile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Minimal makefile for Sphinx documentation
+#
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: help Makefile
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

ms-swift/docs/README.md ADDED Viewed

	@@ -0,0 +1,37 @@

+## maintain docs
+1. build docs
+    ```shell
+    # in root directory:
+    make docs
+    ```
+2. doc string format
+    We adopt the google style docstring format as the standard, please refer to the following documents.
+    1. Google Python style guide docstring [link](http://google.github.io/styleguide/pyguide.html#381-docstrings)
+    2. Google docstring example [link](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
+    3. sample：torch.nn.modules.conv [link](https://pytorch.org/docs/stable/_modules/torch/nn/modules/conv.html#Conv1d)
+    4. load function as an example：
+    ```python
+    def load(file, file_format=None, **kwargs):
+        """Load data from json/yaml/pickle files.
+        This method provides a unified api for loading data from serialized files.
+        Args:
+            file (str or :obj:`Path` or file-like object): Filename or a file-like
+                object.
+            file_format (str, optional): If not specified, the file format will be
+                inferred from the file extension, otherwise use the specified one.
+                Currently supported formats include "json", "yaml/yml".
+        Examples:
+            >>> load('/path/of/your/file')  # file is stored in disk
+            >>> load('https://path/of/your/file')  # file is stored on internet
+            >>> load('oss://path/of/your/file')  # file is stored in petrel
+        Returns:
+            The content from the file.
+        """
+    ```

ms-swift/docs/make.bat ADDED Viewed

	@@ -0,0 +1,35 @@

+@ECHO OFF
+pushd %~dp0
+REM Command file for Sphinx documentation
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+if "%1" == "" goto help
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+:end
+popd

ms-swift/docs/resources/grpo.png ADDED Viewed

ms-swift/docs/source/.readthedocs.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+# Required
+version: 2
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.10"
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+#    - pdf
+#    - epub
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+   install:
+      - requirements: requirements/docs.txt
+      - requirements: requirements/framework.txt

ms-swift/docs/source/BestPractices/GRPO代码训练.md ADDED Viewed

	@@ -0,0 +1,134 @@

+# GRPO代码训练
+本文档介绍如何使用GRPO对模型进行代码训练
+模型：[Qwen/Qwen2.5-7B-Instruct](https://www.modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct)
+数据集：[open-r1/verifiable-coding-problems-python-10k](https://www.modelscope.cn/datasets/open-r1/verifiable-coding-problems-python-10k/dataPeview)
+数据集样例
+```json
+{
+  "problem": "Solve the following coding problem using the programming language python: Polycarp has $n$ different binary words. A word called binary if it contains only characters '0' and '1'. For example, these words are binary: \"0001\", \"11\", \"0\" and \"0011100\". Polycarp wants to offer his set of $n$ binary words to play a game \"words\". In this game, players name words and each next word (starting from the second) must start with the last character of the previous word. The first word can be any. For example, these sequence of words can be named during the game: \"0101\", \"1\", \"10\", \"00\", \"00001\". Word reversal is the operation of reversing the order of the characters. For example, the word \"0111\" after the reversal becomes \"1110\", the word \"11010\" after the reversal becomes \"01011\". Probably, Polycarp has such a set of words that there is no way to put them in the order correspondent to the game rules. In this situation, he wants to reverse some words from his set so that: the final set of $n$ words still contains different words (i.e. all words are unique); there is a way to put all words of the final set of words in the order so that the final sequence of $n$ words is consistent with the game rules. Polycarp wants to reverse minimal number of words. Please, help him. -----Input----- The first line of the input contains one integer $t$ ($1 \\le t \\le 10^4$) — the number of test cases in the input. Then $t$ test cases follow. The first line of a test case contains one integer $n$ ($1 \\le n \\le 2\\cdot10^5$) — the number of words in the Polycarp's set. Next $n$ lines contain these words. All of $n$ words aren't empty and contains only characters '0' and '1'. The sum of word lengths doesn't exceed $4\\cdot10^6$. All words are different. Guaranteed, that the sum of $n$ for all test cases in the input doesn't exceed $2\\cdot10^5$. Also, guaranteed that the sum of word lengths for all test cases in the input doesn't exceed $4\\cdot10^6$. -----Output----- Print answer for all of $t$ test cases in the order they appear. If there is no answer for the test case, print -1. Otherwise, the first line of the output should contain $k$ ($0 \\le k \\le n$) — the minimal number of words in the set which should be reversed. The second line of the output should contain $k$ distinct integers — the indexes of the words in the set which should be reversed. Words are numerated from $1$ to $n$ in the order they appear. If $k=0$ you can skip this line (or you can print an empty line). If there are many answers you can print any of them. -----Example----- Input 4 4 0001 1000 0011 0111 3 010 101 0 2 00000 00001 4 01 001 0001 00001 Output 1 3 -1 0 2 1 2 The input will be stdin and you should print your solution to stdout Now solve the problem and return the code.",
+  "verification_info": {
+    "language": "python",
+    "test_cases": [
+      {
+        "input": "4\n4\n0001\n1000\n0011\n0111\n3\n010\n101\n0\n2\n00000\n00001\n4\n01\n001\n0001\n00001\n",
+        "output": "1\n3 \n-1\n0\n\n2\n1 2 \n",
+        "type": "stdin_stdout"
+      }
+    ]
+  }
+}
+```
+`verification_info` 提供了程序语言以及测试用例，其中包含输入和预期的输出。
+## 奖励函数
+使用`code_reward`和`code_format`奖励进行训练，实现细节见[代码](../../../examples/train/grpo/plugin/plugin.py)
+- `code_reward`通过[e2b](https://e2b.dev/)或[judge0](https://judge0.com/)执行生成的代码，根据数据集中的测试用例对代码进行验证给出奖励值。
+- `code_format`要求模型输出包含代码块的格式化回答。
+注：当前通过e2b执行代码仅支持python语言，如需执行其他语言，可以使用judge0执行（[judge0支持语言列表](https://github.com/judge0/judge0?tab=readme-ov-file#supported-languages)）。
+## 训练脚本
+### e2b
+- 在[e2b](https://e2b.dev/dashboard)注册获取E2B_API_KEY，并设置为环境变量。
+- `--reward_funcs`添加`external_code_reward`作为奖励函数。
+- `--external_plugins`设置为plugin.py的路径。
+```bash
+E2B_API_KEY=xxx \
+WANDB_API_KEY=xxx \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=7 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_code_reward external_code_format \
+    --reward_weights 1.0 0.1 \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.7 \
+    --vllm_max_model_len 8192 \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --dataset 'open-r1/verifiable-coding-problems-python-10k' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 2 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 14 \
+    --temperature 0.9 \
+    --num_infer_workers 1 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true \
+    --report_to wandb
+```
+### judge0
+- 设置环境变量：
+    - （必需）JUDGE0_ENDPOINT: judge0访问地址。
+    - （可选）JUDGE0_X_AUTH_TOKEN: judge0访问Token。
+- `--reward_funcs`添加`external_code_reward_by_judge0`作为奖励函数。
+- `--external_plugins`设置为plugin.py的路径。
+```bash
+JUDGE0_ENDPOINT=xxx \
+JUDGE0_X_AUTH_TOKEN=xxx \
+WANDB_API_KEY=xxx \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=7 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_code_reward_by_judge0 external_code_format \
+    --reward_weights 1.0 0.1 \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.7 \
+    --vllm_max_model_len 8192 \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --dataset 'open-r1/verifiable-coding-problems-python-10k' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 2 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 14 \
+    --temperature 0.9 \
+    --num_infer_workers 1 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true \
+    --report_to wandb
+```
+训练奖励曲线图
+![训练奖励曲线图](../../resources/grpo_code.png)

ms-swift/docs/source/BestPractices/GRPO多模态训练.md ADDED Viewed

	@@ -0,0 +1,306 @@

+# 多模态GRPO完整实验流程
+本文介绍如何使用SWIFT GRPO进行多模态模型和任务的训练。目标是对多个多模态任务进行训练，提升任务精度，任务定义和训练参数等参考了 [R1-V](https://github.com/Deep-Agent/R1-V.git) 和 [open-r1-multimodal](https://github.com/EvolvingLMMs-Lab/open-r1-multimodal.git)
+## ClevrCount 任务
+### 任务与数据集定义
+本任务从clevr_cogen_a_train数据集出发，模型的目标是输出图像中包含的物体数量，因此，我们定义数据集如下：
+```python
+class ClevrPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        query = row.get('query', '')
+        query = f"""{query} Output the thinking process in <think> </think> and
+ final answer (number) in <answer> </answer> tags."""
+        row.update({'query': query})
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='okwinds/clevr_cogen_a_train',
+        subsets=[
+            SubsetDataset(
+                name='default',
+                subset='default',
+                split=['train'],
+            ),
+        ],
+        preprocess_func=ClevrPreprocessor(),
+        tags=['qa', 'math']))
+```
+这里重新定义dataset preprocessor的目的是修改query。数据集示例样本如下，包含messages,images和solution字段，solution会送入后续的奖励函数中，而messages和images则会作为模型输入。
+- 注意：`{'role': 'assistant', 'content': '<answer> 3 </answer>'}`将会在GRPOTrainer中被移除，可以忽略。'solution'字段将会透传入ORM中。在自定义数据集时，'images'字段组织成`["image_path1", "image_path2"]`即可。
+```json
+{
+    "images": ["image_path1", "image_path2"],
+    "messages": [
+        {
+            "role": "user",
+            "content": "How many items are there in the image? Output the thinking process in <think> </think> and \n final answer (number) in <answer> </answer> tags."
+        }
+    ],
+    "solution": "<answer> 3 </answer>"
+}
+```
+## 奖励函数定义：
+本任务使用的奖励函数有两个，一个是 Deepseek-R1 中提到的格式奖励函数，另一是 ClevrCount 的准确性奖励函数。前者已经在swift中内置，通过 `--reward_funcs format` 可以直接使用，而后者需要我们自己定义，在这里我们使用 external_plugin 的方式定义准确性奖励函数，将代码放在`swift/examples/train/grpo/plugin/plugin.py`中。
+在这里，奖励函数的输入包括completions和solution两个字段，分别表示模型生成的文本和真值。每个都是list，支持多个completion同时计算。注意，在这里，solution字段是数据集中定义的字段透传而来，如果有任务上的变动，可以分别对数据集和奖励函数做对应的改变即可。
+```python
+class MultiModalAccuracyORM(ORM):
+    def __call__(self, completions, solution, **kwargs) -> List[float]:
+        """
+        Reward function that checks if the completion is correct.
+        Args:
+            completions (list[str]): Generated outputs
+            solution (list[str]): Ground Truths.
+        Returns:
+            list[float]: Reward scores
+        """
+        rewards = []
+        from math_verify import parse, verify
+        for content, sol in zip(completions, solution):
+            reward = 0.0
+            # Try symbolic verification first
+            try:
+                answer = parse(content)
+                if float(verify(answer, parse(sol))) > 0:
+                    reward = 1.0
+            except Exception:
+                pass  # Continue to next verification method if this fails
+            # If symbolic verification failed, try string matching
+            if reward == 0.0:
+                try:
+                    # Extract answer from solution if it has think/answer tags
+                    sol_match = re.search(r'<answer>(.*?)</answer>', sol)
+                    ground_truth = sol_match.group(1).strip() if sol_match else sol.strip()
+                    # Extract answer from content if it has think/answer tags
+                    content_match = re.search(r'<answer>(.*?)</answer>', content)
+                    student_answer = content_match.group(1).strip() if content_match else content.strip()
+                    # Compare the extracted answers
+                    if student_answer == ground_truth:
+                        reward = 1.0
+                except Exception:
+                    pass  # Keep reward as 0.0 if both methods fail
+            rewards.append(reward)
+        return rewards
+orms['external_r1v_acc'] = MultiModalAccuracyORM
+```
+### GRPO训练实验记录
+#### 训练参数：
+我们选取 Qwen2.5-VL-3B-Instruct 作为基础模型进行训练，选取 Instruct 而不是基模的主要原因是可以更快地获取 format reward。我们在八卡 GPU 上进行实验。SWIFT GRPO训练已支持多卡部署模型以加速rollout，因此我们设置num_infer_workers为2，进程数为6，即2卡部署，6卡训练。如果遇到vllm部署qwen2.5-vl报错，可以参考[issue](https://github.com/vllm-project/vllm/issues/13285)
+由于任务简单，我们设置max_completion_length为1024，奖励函数选择external_r1v_acc和format，学习率和beta分别设置为1e-6和0.001。其他设置如下所示，batch_size和num_generations的设置原则可以参考[GRPO完整流程](./GRPO完整流程.md)。
+```shell
+WANDB_API_KEY=your_wandb_api_key \
+NPROC_PER_NODE=6 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_r1v_acc format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.6 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'okwinds/clevr_cogen_a_train' \
+    --vllm_max_model_len 8192 \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 8 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --save_strategy 'steps' \
+    --eval_strategy 'steps' \
+    --eval_steps 1000 \
+    --save_steps 1000 \
+    --save_total_limit 10 \
+    --logging_steps 1 \
+    --output_dir output/GRPO_CLEVR_COUNTDOWN \
+    --warmup_ratio 0.01 \
+    --dataloader_num_workers 4 \
+    --num_generations 24 \
+    --temperature 1.0 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero3 \
+    --log_completions true \
+    --report_to wandb \
+    --num_iterations 1 \
+    --num_infer_workers 2 \
+    --async_generate false \
+    --beta 0.001 \
+```
+#### 实验现象
+![image.png](../../resources/grpo_clevr_count.png)
+由于数据集和任务比较简单，模型训练了500个epoch已经基本收敛，有以下观察。
+1. 自定义的ClevrORM在不断增加，证明模型学习到了如何完成这一任务，最终任务成功率从初始的0.4攀升到1左右。
+2. 由于是Instruct模型，Format Reward基本稳定在1，这可能也与数据集所有样本都是一样的query有一定关系。
+3. reward_std 稳定到0.1以下。
+4. completion length最终稳定在60-80，模型学习到的固定输出范式是逐个物体列举数数。
+step 1:
+```
+"<think> The image shows 5 distinct items: a cyan metallic cylinder, a red metallic cylinder, a small purple metallic cylinder, a large gray metallic sphere, and a small yellow metallic cube. To count the number of items, I add the counts of each type of object: 1 cyan cylinder, 1 red cylinder, 1 purple cylinder, 1 gray sphere, and 1 yellow cube. Therefore, the total number of items is 5. </think>\n<answer>5</answer>"
+"Let's count the number of items in the image:\n1. A large cyan metallic cylinder.\n2. A small purple metallic cylinder.\n3. A large red metallic cylinder.\n4. A large gray metallic sphere.\n5. A small yellow metallic cube.\n\n<answer>5</answer>"
+"<think> The image shows four distinct objects: a large cyan metallic cylinder, a smaller purple metallic cylinder, a large red metallic cylinder, and a small yellow metallic cube. Counting each of these items individually, there are five items in total. </think>\n<answer>5</answer>"
+```
+step 400:
+```
+"<think> Counting the number of items in the image:\n1. Green matte sphere\n2. Large metallic yellow sphere\n3. Small metallic brown cube\n\nThere are three distinct objects in total.\n</think>\n<answer>3</answer>"
+"<think> Counting the objects in the image:\n1. Large yellow metallic sphere\n2. Small green rubber sphere\n3. Small brown metallic cube\n\nThere are three distinct items in the image.\n</think>\n<answer>3</answer>"
+```
+总体来讲，这一任务比较简单，reward的收敛也比较典型。
+## Geometric QA任务
+### 任务与数据集定义
+本任务为Geometric QA任务，任务描述为：给定一个几何图形，回答有关几何图形的数学问题。原始数据来自于[论文](https://arxiv.org/pdf/2312.11370)，[R1-V](https://github.com/Deep-Agent/R1-V.git)对数据进行了预处理，将所有数据全部处理成了problem-solution的格式，而图像则保留在image字段中，因此，我们不需要额外定义数据集，直接使用`--dataset AI-ModelScope/GEOQA_R1V_Train_8K`即可。
+### 奖励函数
+由于也是数学题，同时，答案也处理成了最终结果，因此，我们直接使用以上定义过的`MultiModalAccuracyORM`奖励函数。
+### GRPO训练实验记录
+#### 训练参数：
+选取的模型和大部分超参数与上一个实验相似，主要有两点不同：
+1. SWIFT 已支持`--num_iteration`参数，单次rollout可以进行多次更新，这里设置为2。
+2. 在实验时发现，在数学问题中，训练可能会出现不稳定现象，导致模型训崩，具体表现为所有rewar迅速降低，loss、grad_norm和kl都迅速增大，后续也难以恢复正常状态。因此，这里设置`--max_grad_norm 0.5`，保证稳定训练，当然，这种现��的出现也有一定的随机性。
+```shell
+WANDB_API_KEY=your_wandb_api_key \
+MAX_PIXELS=401408 \
+NPROC_PER_NODE=6 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_r1v_acc format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.6 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-ModelScope/GEOQA_R1V_Train_8K' \
+    --vllm_max_model_len 8192 \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 8 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --save_strategy 'steps' \
+    --eval_strategy 'steps' \
+    --eval_steps 400 \
+    --save_steps 400 \
+    --save_total_limit 10 \
+    --logging_steps 1 \
+    --output_dir output/GRPO_GEOQA \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --repetition_penalty 1.1 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero3 \
+    --log_completions true \
+    --report_to wandb \
+    --num_iterations 2 \
+    --num_infer_workers 2 \
+    --async_generate false \
+    --beta 0.001 \
+    --max_grad_norm 0.5 \
+```
+#### 实验现象
+![image.png](../../resources/grpo_geoqa.png)
+训练曲线如上图所示。
+1. 相比于Count任务，Geometric QA任务的难度明显更大，因此，Accuracy Reward收敛明显更慢，同时震荡幅度也明显更大。
+2. completion_length的变化趋势比较明显，最终收敛到250 token左右。
+3. reward基本收敛，标准差在一共1300个step时稳定在0.2左右。
+以下是训练后的模型completition示例：
+```
+User:
+"If AB is the tangent to circle O with the tangent point A, and BO intersects circle O at point C, and point D is located on circle O, what is the measure of angle ADC if the degree of angle ABO is 32.0?"
+Assistant:
+"<think>\nTo find the measure of angle ADC, we first need to understand the properties of the tangent and the circle. Since AB is a tangent to the circle at point A, the line segment OA (which is perpendicular to the tangent AB) is perpendicular to AB. This means that angle OAB is 90 degrees.\n\nGiven that angle ABO is 32 degrees, we can find angle BAO by subtracting angle ABO from 90 degrees:\n∠BAO = 90° - ∠ABO = 90° - 32° = 58°\n\nSince angle BAO is an angle in the minor arc AD, the angle ADC, which is an angle in the minor arc AD, is half the angle BAO. Therefore, angle ADC is:\n∠ADC = 1/2 × ∠BAO = 1/2 × 58° = 29°\n\nSo, the measure of angle ADC is 29 degrees.\n</think>\n<answer>\nThe measure of angle ADC is 29 degrees.\n</answer>"
+```
+## Multimodal Open R1 数据集实验
+### 任务与数据集定义
+本任务为参考[open-r1-multimodal](https://github.com/EvolvingLMMs-Lab/open-r1-multimodal.git)的实验，使用数据集：[lmms-lab/multimodal-open-r1-8k-verified](https://www.modelscope.cn/datasets/lmms-lab/multimodal-open-r1-8k-verified)，该数据集专注于多模态的数学推理任务，数据由GPT4o基于`Math360K`和`Geo170K`数据集生成，包含推理路径和可验证答案。数据集中已包含了image, problem和solution字段，我们也不需要针对prompt进行修改，因此无需额外定义数据集。
+### 奖励函数
+我们直接使用以上定义过的`MultiModalAccuracyORM`奖励函数。
+### GRPO训练实验记录
+#### 训练参数：
+选取的模型和大部分超参数与上一个实验相似，由于训练的时候出现了OOM，我们设置`MAX_PIXELS=262144`以降低显存占用。
+```shell
+WANDB_API_KEY=your_wandb_api_key \
+MAX_PIXELS=262144 \
+MASTER_PORT=29600 \
+NPROC_PER_NODE=6 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_r1v_acc format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.6 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'lmms-lab/multimodal-open-r1-8k-verified' \
+    --vllm_max_model_len 8192
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 8 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --save_strategy 'steps' \
+    --eval_strategy 'steps' \
+    --eval_steps 400 \
+    --save_steps 400 \
+    --save_total_limit 10 \
+    --logging_steps 1 \
+    --output_dir output/GRPO_GEOQA \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --repetition_penalty 1.1 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero3 \
+    --log_completions true \
+    --report_to wandb \
+    --num_iterations 2 \
+    --num_infer_workers 2 \
+    --async_generate false \
+    --beta 0.001 \
+    --max_grad_norm 0.5 \
+```
+#### 实验现象
+![image.png](../../resources/grpo_openr1_multimodal.png)
+训练曲线如上图所示。
+一共8k数据，训练了1268个step，Accuracy Reward收敛到0.5左右，completion_length基本收敛到200token左右，reward_std最终在0.2左右震荡。
+以下是训练后的模型completition示例：
+```
+User:
+"Based on the map, which state falls into the lowest percentage range of lots, and what is that percentage range?"
+Assistant:
+"<think>\nThe image provided shows a color-coded map of the USA indicating different ranges of lot percentages across various states. According to the legend at the bottom, the lowest percentage range (6.0% - 6.5%) is represented by white. In the image, Alabama (AL) is shaded in white, which corresponds to the 6.0% - 6.5% category. Therefore, based on the map, the state that falls into the lowest percentage range of lots is Alabama, with the percentage range of 6.0% - 6.5%.\nTherefore, the answer is 6.0% - 6.5%.\n</think>\n<answer>Alabama</answer>"
+```

ms-swift/docs/source/BestPractices/快速训练VL模型.md ADDED Viewed

	@@ -0,0 +1,227 @@

+# 快速训练视觉语言（Vision-Language, VL）模型的最佳实践
+本文档提供从零开始快速训练视觉语言(Vision-Language, VL)模型的最佳实践。
+涉及的模型链接：
+- [Qwen2.5-VL-7B-Instruct](https://www.modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct)
+- [Qwen3-8B](https://www.modelscope.cn/models/Qwen/Qwen3-8B)
+训练的模型链接：
+- [Simple-VL-8B](https://www.modelscope.cn/models/swift/Simple-VL-8B/summary)
+本训练流程基于 Qwen2.5-VL-7B-Instruct 模型架构，将其内部的语言模型（LLM）部分替换为 Qwen3-8B 的权重，训练模型的视觉理解能力。具体步骤如下：
+1. 修改原始模型的配置文件 config.json，使其适配 Qwen3-8B 的模型结构。
+2. 初始化并加载新的模型权重，保存为新模型。
+3. 对新模型进行两阶段微调：
+    1. 第一阶段：仅训练视觉到语言的对齐模块（aligner），冻结 ViT 和 LLM 部分。
+    2. 第二阶段：解冻所有模块，联合训练提升整体性能。
+## 模型修改
+### 修改配置文件 config.json
+因为 Qwen2.5-VL-7B-Instruct 模型的底模 Qwen2.5-7B-Instruct 与 Qwen3-8B 在模型结构上存在部分差异（比如层数，hidden_state_dims），我们首先需要基于Qwen2.5-VL-7B-Instruct的config.json文件，创建一个新的config.json文件，并修改以下参数对齐Qwen3-8B
+```
+修改
+1. hidden_size 3584->4096
+2. intermediate_size: 18944->12288
+3. num_attention_heads: 28->32
+4. num_key_value_heads: 4->8
+5. num_hidden_layers: 28->32
+6. vocab_size:152064->151936
+7. max_window_layers:28->36
+新增
+1. head_dim： 128
+```
+### 模型权重初始化与替换
+使用以下 Python 脚本完成模型权重的初始化、替换与保存：
+```python
+import torch
+from modelscope import Qwen2_5_VLForConditionalGeneration, AutoModelForCausalLM, AutoConfig
+from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLPatchMerger, Qwen2_5_VLModel
+from accelerate import Accelerator
+# 加载原始 VL 模型和 Qwen3-8B 模型
+qwen2_5_vl_7b_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen2.5-VL-7B-Instruct",
+    device_map="cuda",
+    torch_dtype=torch.bfloat16
+)
+device = qwen2_5_vl_7b_model.device
+qwen3_8b_model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen3-8B",
+    device_map=device,
+    torch_dtype=torch.bfloat16
+)
+# 加载配置
+old_config = AutoConfig.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
+new_config = AutoConfig.from_pretrained("/path/to/new_config_dir") # 新 config 的文件夹路径
+# 1. 替换 ViT 到 LLM 的 merger(aligner) 层
+new_merger = Qwen2_5_VLPatchMerger(
+            dim=new_visual_config.out_hidden_size,
+            context_dim=new_visual_config.hidden_size,
+            spatial_merge_size=new_visual_config.spatial_merge_size,
+        ).to(device).to(torch.bfloat16)
+qwen2_5_vl_7b_model.visual.merger = new_merger
+# 2. 替换 VL 模型的 LLM 部分
+new_llm_model = Qwen2_5_VLModel(new_config).to(device).to(torch.bfloat16)
+for name, param in qwen3_8b_model.model.named_parameters():
+    if name in new_llm_model.state_dict():
+        new_llm_model.state_dict()[name].copy_(param)
+qwen2_5_vl_7b_model.model = new_llm_model
+qwen2_5_vl_7b_model.lm_head = qwen3_8b_model.lm_head
+# 3. 保存修改后的模型
+accelerator = Accelerator()
+accelerator.save_model(
+    model=qwen2_5_vl_7b_model,
+    save_directory="/path/to/save/Qwen3-VL-Model",
+    max_shard_size="4GB",
+    safe_serialization=True
+)
+```
+## 训练
+为简化流程，我们跳过预训练（pretrain），直接进入监督微调（SFT）。训练分为两个阶段：
+### stage1 训练 Aligner 层
+仅训练视觉到语言的对齐层（Aligner），冻结 ViT 和 LLM 部分：
+```bash
+NNODES=$WORLD_SIZE \
+NODE_RANK=$RANK \
+NPROC_PER_NODE=8 \
+MAX_PIXELS=1003520 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift sft \
+    --model /path/to/new_vl_model \
+    --model_type qwen2_5_vl \
+    --train_type full \
+    --dataset xxx  \
+    --torch_dtype bfloat16 \
+    --attn_impl flash_attn \
+    --freeze_vit true \
+    --freeze_llm true \
+    --freeze_aligner false \
+    --num_train_epochs 3 \
+    --per_device_train_batch_size 2 \
+    --learning_rate 5e-6 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps -1 \
+    --save_steps 1000 \
+    --save_total_limit 10 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 8 \
+    --deepspeed zero2
+```
+### stage2 训练整个模型
+解冻所有模块，联合训练以增强模型的整体视觉理解能力：
+```bash
+NNODES=$WORLD_SIZE \
+NODE_RANK=$RANK \
+NPROC_PER_NODE=8 \
+MAX_PIXELS=1003520 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift sft \
+    --model /path/to/stage1_checkpoint \
+    --model_type qwen2_5_vl \
+    --train_type full \
+    --dataset xxx \
+    --torch_dtype bfloat16 \
+    --attn_impl flash_attn \
+    --freeze_vit false \
+    --freeze_llm false \
+    --freeze_aligner false \
+    --num_train_epochs 3 \
+    --per_device_train_batch_size 2 \
+    --learning_rate 5e-6 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps -1 \
+    --save_steps 1000 \
+    --save_total_limit 10 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 8 \
+    --deepspeed zero2
+```
+## 推理/部署/评测
+### 推理
+通过`swift infer`来推理训练得到的模型
+```bash
+swift infer \
+    --model /path/to/stage2_checkpoint
+```
+### 部署
+使用 vLLM 加速模型服务部署：
+```
+CUDA_VISIBLE_DEVICES=0 \
+MAX_PIXELS=1003520 \
+VIDEO_MAX_PIXELS=50176 \
+FPS_MAX_FRAMES=12 \
+swift deploy \
+    --model /path/to/stage2_checkpoint \
+    --infer_backend vllm \
+    --gpu_memory_utilization 0.9 \
+    --max_model_len 8192 \
+    --max_new_tokens 2048 \
+    --limit_mm_per_prompt '{"image": 5, "video": 2}' \
+    --served_model_name Qwen3-VL
+```
+### 评测
+通过 [EvalScope](https://github.com/modelscope/evalscope/) 对训练得到的 VL 模型进行评测
+以下是以 MMMU benchmark 为例的评测代码：
+```python
+from evalscope import TaskConfig, run_task
+task_cfg_dict = TaskConfig(
+    work_dir='outputs',
+    eval_backend='VLMEvalKit',
+    eval_config={
+        'data': ['MMMU_DEV_VAL'],
+        'mode': 'all',
+        'model': [
+            {'api_base': 'http://localhost:8000/v1/chat/completions',
+            'key': 'EMPTY',
+            'name': 'CustomAPIModel',
+            'temperature': 0.6,
+            'type': 'Qwen3-VL',
+            'img_size': -1,
+            'video_llm': False,
+            'max_tokens': 512,}
+            ],
+        'reuse': False,
+        'nproc': 64,
+        'judge': 'exact_matching'},
+)
+run_task(task_cfg=task_cfg_dict)
+```

ms-swift/docs/source/BestPractices/更多最佳实践.md ADDED Viewed

	@@ -0,0 +1,7 @@

+# 更多最佳实践
+- [Qwen2.5自我认知微调](https://github.com/modelscope/ms-swift/tree/main/examples/notebook/qwen2_5-self-cognition)
+- [Qwen2-VL Latex-OCR微调](https://github.com/modelscope/ms-swift/tree/main/examples/notebook/qwen2vl-ocr)
+- [Qwen2.5-VL Grounding任务微调](https://github.com/modelscope/ms-swift/tree/main/examples/notebook/qwen2_5-vl-grounding)
+- [Qwen3全流程最佳实践](https://github.com/modelscope/ms-swift/issues/4030)

ms-swift/docs/source/Customization/插件化.md ADDED Viewed

	@@ -0,0 +1,215 @@

+# 插件化
+插件化是SWIFT3.0中新增的重要能力。我们希望通过插件化的方式，让开发者对开发流程的定制更加自然。
+## callback回调
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/callback.py).
+`callback`机制是transformers Trainer中的一种训练定制化机制。开发者可以在callback中控制训练流程。通常来说，callback的定制化类似下面的样子：
+```python
+class CustomCallback(TrainerCallback):
+    def on_train_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
+        # Doing something when the training begins.
+        pass
+    def on_save(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
+        # Doing something when save checkpoint
+        pass
+```
+callback会在trainer构造前注册进trainer中，example中给出了一个简单版本的EarlyStop方案。注册你自己的callback的方式比较简单：
+```python
+extra_callbacks = [CustomCallback()]
+```
+开发者可以在plugin/callback.py中增加新的callback，并定制自己的训练流程。callback的具体参数可以查看[这里](https://huggingface.co/docs/transformers/main_classes/callback)。
+## 定制化loss
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/loss.py).
+SWIFT支持在plugin中定制loss。如果不使用这个能力，默认会使用交叉熵Loss（CE Loss）。开发者可以在这个文件中编写代码，注册后trainer会自动使用你定制的loss方法。
+例如在plugin/loss.py中添加下面的代码：
+```python
+@register_loss_func("custom_loss")
+def loss_scale_func(outputs, labels, loss_scale=None, num_items_in_batch=None) -> torch.Tensor:
+    # Write your own loss calculating here
+    return loss
+```
+需要注意的是，loss和trainer训练的任务是强相关的，目前的loss定制针对pt和sft任务，如果是人类对齐任务（例如DPO、PPO等）或分类任务（seq_cls）任务在插件中是无法定制的。
+## 定制化loss_scale
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/loss_scale/loss_scale.py).
+loss_scale机制在SWIFT中是非常重要的机制之一。在pt和sft任务中，可训练token的loss是均匀的，即每个token平等的进行bp。但在某些情况下，某些token的权重比较大，需要被额外关注，
+在这种情况下就需要更高的权重。loss_scale可以让开发者自由地定义自己的token权重。
+```python
+class LastRoundLossScale(LossScale):
+    def get_loss_scale(self, context: str, context_type: ContextType, is_last_round: bool, **kwargs):
+        if context_type == ContextType.RESPONSE:
+            return [context], [float(is_last_round)]
+        return super().get_loss_scale(context, context_type, is_last_round)
+```
+在上面的代码中，返回了一个Tuple，第一个返回是context（或拆解后的context），第二个参数是context对应的loss_scale，float值代表了权重。例如下面的权重设置：
+```text
+["学习", "好", "数学", "是", "重要", "的"]
+[1.0, 0.5, 2.0, 0.5, 2.0, 0.1]
+```
+我们更看重数学和重要两个词，因此我们把它们的权重提升到2.0。
+回到上面的代码，我们判断了传入的context是否是response，如果是response且如果是多轮对话的最后一轮才返回[1]，在其他情况下使用基类的实现（在本场景下loss_scale时[0]）。使用这种方案，
+我们做到了只有最后一轮的response参与训练，其他response不参与训练。使用这种方式，可以让所有token（prompt、response）参与训练，或针对agent某些特殊字符重点训练等。
+在pt和sft中，loss_scale是整体支持（是否参与训练，以及权重大小）的，而人类对齐中只能支持某些token是否参与训练，无法支持权重大小。
+## 定制化metric
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/metric.py).
+metric可以定制训练时使用的评测参数：
+```python
+METRIC_MAPPING = {
+    'acc': (compute_acc_metrics, preprocess_logits_for_acc),
+    'nlg': (compute_nlg_metrics, None),
+    'custom': (custom_metric, custom_preprocess),
+}
+def get_metric(metric: str):
+    return METRIC_MAPPING[metric]
+```
+在上面的定义中，我们添加了新的custom metric，它的value有两个值，第一个值是计算metric的过程，返回一个包含metric key-value对的dict，第二个值是针对logits做前处理，返回实际的predictions。
+## 定制化optimizer
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/optimizer.py).
+- 对模型不同部分采用不同的学习率，例如：ViT和LLM分别使用不同的学习率，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/lora_llm_full_vit/custom_plugin.py)。
+用户可以在这里增加自己的optimizer和lr_scheduler实现：
+```python
+def create_custom_optimizers(args, model, dataset):
+    # 创建自己的optimizer
+    return CustomOptimizer(optimizer_grouped_parameters, **optimizer_kwargs), CustomScheduler(...)
+optimizers_map = {
+    'custom': create_custom_optimizers,
+    ...
+}
+```
+当开发者需要使用其他optimizer，例如某些新论文中定义的optimizer时，可以在这里定义其创建过程，并在参数中使用：
+```shell
+--optimizer custom
+```
+就可以实际调用了。
+## 定制化agent template
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/agent_template).
+## 定制化tuner
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/tuner.py).
+- 多模态模型对ViT部分使用全参数训练，LLM部分使用LoRA训练，参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal/lora_llm_full_vit)。
+- Phi4-multimodal，直接对其已有LoRA进行训练而不额外附加LoRA，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/plugins/tuner_phi4_mm.sh)。
+tuner定制也是swift中有特色的能力之一，开发者可以无视复杂的tuner初始化流程和代码整合成本，将新的tuner注册在这里：
+```python
+class IA3(Tuner):
+    @staticmethod
+    def prepare_model(args: 'TrainArguments', model: torch.nn.Module) -> torch.nn.Module:
+        model_arch: ModelKeys = MODEL_ARCH_MAPPING[model.model_meta.model_arch]
+        ia3_config = IA3Config(
+            target_modules=find_all_linears(model), feedforward_modules='.*' + model_arch.mlp.split('{}.')[1] + '.*')
+        return get_peft_model(model, ia3_config)
+    @staticmethod
+    def save_pretrained(
+        model: torch.nn.Module,
+        save_directory: str,
+        state_dict: Optional[dict] = None,
+        safe_serialization: bool = True,
+        **kwargs,
+    ) -> None:
+        model: PeftModel
+        model.save_pretrained(save_directory, state_dict=state_dict, safe_serialization=safe_serialization, **kwargs)
+    @staticmethod
+    def from_pretrained(model: torch.nn.Module, model_id: str, **kwargs) -> torch.nn.Module:
+        return PeftModel.from_pretrained(model, model_id, **kwargs)
+```
+上面的例子中，我们将peft的IA3应用于模型训练中，在这个类中包含了三个方法：
+- prepare_model: 如何将原始模型使用tuner进行封装，并设置好可训练参数
+- save_pretrained: 如何在训练中保存模型
+- from_pretrained: 如何在后续训练和推理中将之前存下来的checkpoint重新拉起
+上面的三个方法会在swift训练流程中被调用，这样就做到了开发者可以不阅读复杂的训练代码而使用自己的tuner。
+## PRM
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/prm.py)。
+PRM是过程奖励模型，PRM会在`swift sample`命令中使用。PRM需要支持的接口比较简单：
+```python
+class PRM:
+    def __init__(self):
+        # init here
+        pass
+    def __call__(self, infer_requests: List[InferRequest], **kwargs) -> List[Union[float, List[float]]]:
+        raise NotImplementedError
+```
+其中的InferRequest来自于`swift.llm`，返回的`List[Union[float, List[float]]]`，列表中可能是reward也可能是若干reward。开发者可以在infer_requests中拿到queries和responses，并按照自己的方式进行切分，例如：
+```text
+Let's think step by step.
+Step1: xxx
+Step2: xxx
+So, the answer is ...
+```
+开发者可以在这里对过程进行切分，并按batch传入PRM中进行推理并返回rewards。更通用来说，开发者可以在这里调用一个远端URL，例如一个闭源PRM大模型并返回rewards。
+## ORM
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/orm.py)。
+ORM是结果奖励模型。ORM一般使用正则表达式来进行，ORM决定了response是否是正确的。例如：
+```python
+class MathORM(ORM):
+    @staticmethod
+    def extract_boxed_result(text):
+        pattern = r'\\boxed{([^}]*)}'
+        match = re.search(pattern, text)
+        if match:
+            return match.group(1).strip()
+        else:
+            return None
+    def __call__(self, infer_requests: List[InferRequest], ground_truths: List[str],
+                **kwargs) -> List[float]:
+        rewards = []
+        predictions = [request.messages[-1]['content'] for request in infer_requests]
+        for prediction, ground_truth in zip(predictions, ground_truths):
+            res1 = MathORM.extract_boxed_result(prediction) or ''
+            res2 = MathORM.extract_boxed_result(ground_truth) or ''
+            rewards.append(float(res1.strip() == res2.strip()))
+        return rewards
+orms = {
+    'math': MathORM,
+}
+```
+在上面的代码中，我们定义了一个对数学response进行解析的过程，如果结果相同则返回score为1.0，否则为0.0。和PRM不同，这个类的infer中有一个额外参数`ground_truths`，
+该参数是对应的infer_requests的��际label（数据集中定义的标准response）。

ms-swift/docs/source/Customization/自定义模型.md ADDED Viewed

	@@ -0,0 +1,34 @@

+# 自定义模型
+ms-swift内置的模型，你可以直接通过指定model_id或者model_path来使用：`--model <model_id_or_path>`。ms-swift会根据model_id/model_path的后缀和`config.json`文件来判断model_type。每种model_type都有唯一的模型结构、template和加载方式。当然，你也可以手动传入`--model_type`、`--template`来进行覆盖。ms-swift已支持的model_type和template可以查看[支持的模型与数据集](../Instruction/支持的模型和数据集.md)。
+## 模型注册
+自定义模型通常使用模型注册的方式进行，可以参考[内置模型](https://github.com/modelscope/ms-swift/blob/main/swift/llm/model/model/qwen.py)、[内置对话模板](https://github.com/modelscope/ms-swift/blob/main/swift/llm/template/template/qwen.py)或者[examples](https://github.com/modelscope/swift/blob/main/examples/custom)的示例代码。你可以通过指定`--custom_register_path xxx.py`解析外置注册的内容（方便pip install而非git clone的用户）。
+register_model会在`MODEL_MAPPING`中注册模型，调用函数`register_model(model_meta)`即可完成模型注册，其中model_meta将存储模型的元信息。ModelMeta的参数列表如下：
+- model_type: 必填项。模型类型，也是唯一ID。
+- model_groups: 必填项。罗列ModelScope/HuggingFace的模型id和模型本地路径。运行[run_model_info.py](https://github.com/modelscope/ms-swift/blob/main/scripts/utils/run_model_info.py)文件将自动产生[支持的模型文档](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.html)以及自动根据`--model`后缀匹配model_type。
+- template: 必填项。不指定`--template`时的默认template类型。
+- get_function: 必填项。模型和tokenizer/processor（多模态模型）的加载函数。LLM通常设置为`get_model_tokenizer_with_flash_attn`即可。
+- model_arch: 模型架构。默认为None。多模态模型训练需要设置该参数来确定llm/vit/aligner的前缀。
+- architectures: config.json中的architectures项，用于自动匹配模型对应的model_type。默认为`[]`。
+- additional_saved_files: 全参数训练和merge-lora时需要额外保存的文件。默认为`[]`。
+- torch_dtype: 模型加载时未传入`torch_dtype`时的默认dtype。默认为None，从config.json中读取。
+- is_multimodal: 是否是多模态模型，默认为False。
+- ignore_patterns: 从hub端下载文件需要忽略的文件patterns，默认为`[]`。
+register_template会在`TEMPLATE_MAPPING`中注册对话模板，调用函数`register_template(template_meta)`即可完成对话模板注册，其中template_meta将存储template的元信息。TemplateMeta的参数列表如下：
+- template_type: 必填项。对话模板类型，也是唯一ID。
+- prefix: 必填项。对话模板的前缀，通常包含system、bos_token等部分，独立于多轮对话而产生的对话模板循环。例如qwen的prefix为`[]`。
+- prompt: 必填项。表示对话模板中的`{{RESPONSE}}`之前的对话部分。我们使用`{{QUERY}}`代表user询问部分的填充符。例如qwen的prompt为`['<|im_start|>user\n{{QUERY}}<|im_end|>\n<|im_start|>assistant\n']`。
+- chat_sep: 必填项。多轮对话中每轮的分隔符。若设置为None，则该template不支持多轮对话。例如qwen的chat_sep为`['<|im_end|>\n']`。
+- suffix: 默认为`[['eos_token_id']]`。对话模板的后缀部分，独立于多轮对话而产生的对话模板循环，通常为eos_token。例如qwen的suffix为`['<|im_end|>']。`
+- template_cls: 默认为`Template`。通常在定义多模态模型的template时需要进行自定义，自定义`_encode`、`_post_encode`、`_data_collator`函数。
+- system_prefix: 默认为None。含system的对话模板前缀。我们使用`{{SYSTEM}}`作为system的填充符。例如qwen的system_prefix为`['<|im_start|>system\n{{SYSTEM}}<|im_end|>\n']`。
+  - 注意：若system为空时，`prefix`可以被`system_prefix`替代，则可以将`prefix`写为含system的前缀，而无需设置`system_prefix`。
+  - 若prefix不含`{{SYSTEM}}`且未设置system_prefix，则该template不支持system。
+- default_system: 默认为None。不传入`--system`时使用的默认system。例如qwen的default_system为`'You are a helpful assistant.'`。
+- stop_words: 默认为`[]`。除了eos_token和`suffix[-1]`的额外停止符。例如qwen的stop_words为`['<|endoftext|>']`。
+  - 注意：推理时，输出的response将会过滤eos_token和`suffix[-1]`，但是会保留额外的stop_words。

ms-swift/docs/source/GetStarted/Web-UI.md ADDED Viewed

	@@ -0,0 +1,36 @@

+# Web-UI
+目前SWIFT已经支持了界面化的训练和推理，参数支持和脚本训练相同。在安装SWIFT后，使用如下命令：
+```shell
+swift web-ui --lang zh
+# or en
+swift web-ui --lang en
+```
+开启界面训练和推理。
+SWIFT web-ui是命令行的高级封装，即，在界面上启动的训练、部署等任务，会在系统中以命令行启动一个独立的进程，伪代码类似：
+```python
+import os
+os.system('swift sft --model xxx --dataset xxx')
+```
+这给web-ui带来了几个特性：
+1. web-ui的每个超参数描述都带有`--xxx`的标记，这与[命令行参数](../Instruction/命令行参数.md)的内容是一致的
+2. web-ui可以在一台多卡机器上并行启动多个训练/部署任务
+3. web-ui服务关闭后，后台服务是仍旧运行的，这防止了web-ui被关掉后影响训练进程，如果需要关闭后台服务，只需要**选择对应的任务**后在界面上的`运行时`tab点击杀死服务
+4. 重新启动web-ui后，如果需要显示正在运行的服务，在`运行时`tab点击`找回运行时任务`即可
+5. 训练界面支持显示运行日志，请在选择某个任务后手动点击`展示运行状态`，在训练时运行状态支持展示训练图表，图标包括训练loss、训练acc、学习率等基本指标，在人类对齐任务重界面图标为margin、logps等关键指标
+6. web-ui的训练不支持PPO，该过程比较复杂，建议使用examples的[shell脚本](../../../examples/train/rlhf/ppo.sh)直接运行
+如果需要使用share模式，请添加`--share true`参数。注意：请不要在dsw、notebook等环境中使用该参数。
+目前ms-swift额外支持了界面推理模式（即Space部署）：
+```shell
+swift app --model '<model>' --studio_title My-Awesome-Space --stream true
+# 或者
+swift app --model '<model>' --adapters '<adapter>' --stream true
+```
+即可启动一个只有推理页面的应用，该应用会在启动时对模型进行部署并提供后续使用。

ms-swift/docs/source/Instruction/Agent支持.md ADDED Viewed

	@@ -0,0 +1,207 @@

+# Agent支持
+## 数据集格式
+纯文本Agent和多模态Agent的示例数据样本如下：
+```jsonl
+{"tools": ["{\"type\": \"function\", \"function\": {\"name\": \"realtime_aqi\", \"description\": \"天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"string\", \"description\": \"城市名，例如：上海\"}}, \"required\": [\"city\"]}}}"], "messages": [{"role": "user", "content": "北京和上海今天的天气情况"}, {"role": "tool_call", "content": "{\"name\": \"realtime_aqi\", \"arguments\": {\"city\": \"北京\"}}"}, {"role": "tool_call", "content": "{\"name\": \"realtime_aqi\", \"arguments\": {\"city\": \"上海\"}}"}, {"role": "tool_response", "content": "{\"city\": \"北京\", \"aqi\": \"10\", \"unit\": \"celsius\"}"}, {"role": "tool_response", "content": "{\"city\": \"上海\", \"aqi\": \"72\", \"unit\": \"fahrenheit\"}"}, {"role": "assistant", "content": "根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。"}]}
+{"tools": ["{\"type\": \"function\", \"function\": {\"name\": \"click\", \"description\": \"点击屏幕中的某个位置\", \"parameters\": {\"type\": \"object\", \"properties\": {\"x\": {\"type\": \"integer\", \"description\": \"横坐标，表示屏幕上的水平位置\"}, \"y\": {\"type\": \"integer\", \"description\": \"纵坐标，表示屏幕上的垂直位置\"}}, \"required\": [\"x\", \"y\"]}}}"], "messages": [{"role": "user", "content": "<image>现在几点了？"}, {"role": "assistant", "content": "<think>\n我可以通过打开日历App来获取当前时间。\n</think>\n"}, {"role": "tool_call", "content": "{\"name\": \"click\", \"arguments\": {\"x\": 105, \"y\": 132}}"}, {"role": "tool_response", "content": "{\"images\": \"<image>\", \"status\": \"success\"}"}, {"role": "assistant", "content": "成功打开日历App，现在的时间为中午11点"}], "images": ["desktop.png", "calendar.png"]}
+```
+- agent_template为"react_en", "hermes"等情况下，该格式适配所有模型Agent训练，可以轻松在不同模型间切换。
+- 其中tools是一个`List[str]`，其中每一个tool需要是json字符串，messages中role为'tool_call'和'tool_response/tool'的content部分都需要是json字符串。
+- tools字段将在训练/推理时和`{"role": "system", ...}"`部分组合，根据agent_template组成完整的system部分。
+- `{"role": "tool_call", ...}`部分将根据agent_template自动转成对应格式的`{"role": "assistant", ...}`，多条连续的`{"role": "assistant", ...}`将拼接在一起组成完整的assistant_content。
+- `{"role": "tool_response", ...}`也可以写成`{"role": "tool", ...}`，这两种写法是等价的。该部分也将根据`agent_template`自动转换格式。该部分在训练时将不进行损失的计算，角色类似于`{"role": "user", ...}`。
+- 该格式支持并行调用工具，例子参考第一条数据样本。多模态Agent数据样本中`<image>`标签数量应与"images"长度相同，其标签位置代表图像特征的插入位置。当然也支持其他模态，例如audios, videos。
+以下为上述两条数据样本由qwen2_5和qwen2_5_vl的template进行encode后的input_ids和labels，选择的agent_template为**hermes**：
+样本一（并行工具调用）：
+```text
+[INPUT_IDS] <|im_start|>system
+You are Qwen, created by Alibaba Cloud. You are a helpful assistant.
+# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "realtime_aqi", "description": "天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "城市名，例如：上海"}}, "required": ["city"]}}}
+</tools>
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+北京和上海今天的天气情况<|im_end|>
+<|im_start|>assistant
+<tool_call>
+{"name": "realtime_aqi", "arguments": {"city": "北京"}}
+</tool_call>
+<tool_call>
+{"name": "realtime_aqi", "arguments": {"city": "上海"}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+{"city": "北京", "aqi": "10", "unit": "celsius"}
+</tool_response>
+<tool_response>
+{"city": "上海", "aqi": "72", "unit": "fahrenheit"}
+</tool_response><|im_end|>
+<|im_start|>assistant
+根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。<|im_end|>
+[LABELS] [-100 * 195]<tool_call>
+{"name": "realtime_aqi", "arguments": {"city": "北京"}}
+</tool_call>
+<tool_call>
+{"name": "realtime_aqi", "arguments": {"city": "上海"}}
+</tool_call><|im_end|>[-100 * 67]根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。<|im_end|>
+```
+样本二（多模态，混合assistant和tool_call）：
+```text
+[INPUT_IDS] <|im_start|>system
+You are a helpful assistant.
+# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "click", "description": "点击屏幕中的某个位置", "parameters": {"type": "object", "properties": {"x": {"type": "integer", "description": "横坐标，表示屏幕上的水平位置"}, "y": {"type": "integer", "description": "纵坐标，表示屏幕上的垂直位置"}}, "required": ["x", "y"]}}}
+</tools>
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+<|vision_start|>[151655 * 729]<|vision_end|>现在几点了？<|im_end|>
+<|im_start|>assistant
+<think>
+我可以通过打开日历App来获取当前时间。
+</think>
+<tool_call>
+{"name": "click", "arguments": {"x": 105, "y": 132}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+{"images": "<|vision_start|>[151655 * 729]<|vision_end|>", "status": "success"}
+</tool_response><|im_end|>
+<|im_start|>assistant
+成功打开日历App，现在的时间为中午11点<|im_end|>
+[LABELS] [-100 * 924]<think>
+我可以通过打开日历App来获取当前时间。
+</think>
+<tool_call>
+{"name": "click", "arguments": {"x": 105, "y": 132}}
+</tool_call><|im_end|>[-100 * 759]成功打开日历App，现在的时间为中午11点<|im_end|>
+```
+**react_en**是常用的agent template格式之一，以下为样本一由qwen2_5使用`agent_template='react_en'`进行encode后的input_ids和labels：
+```text
+[INPUT_IDS] <|im_start|>system
+Answer the following questions as best you can. You have access to the following tools:
+realtime_aqi: Call this tool to interact with the realtime_aqi API. What is the realtime_aqi API useful for? 天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息 Parameters: {"type": "object", "properties": {"city": {"type": "string", "description": "城市名，例如：上海"}}, "required": ["city"]} Format the arguments as a JSON object.
+Use the following format:
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [realtime_aqi]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+Begin!
+<|im_end|>
+<|im_start|>user
+北京和上海今天的天气情况<|im_end|>
+<|im_start|>assistant
+Action: realtime_aqi
+Action Input: {'city': '北京'}
+Action: realtime_aqi
+Action Input: {'city': '上海'}
+Observation:{"city": "北京", "aqi": "10", "unit": "celsius"}
+Observation:{"city": "上海", "aqi": "72", "unit": "fahrenheit"}
+根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。<|im_end|>
+[LABELS] [-100 * 233]Action: realtime_aqi
+Action Input: {'city': '北京'}
+Action: realtime_aqi
+Action Input: {'city': '上海'}
+Observation:[-100 * 45]根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。<|im_end|>
+```
+更多模型和agent_template的尝试可以使用以下代码，更多的agent template可选值参考[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/agent_template/__init__.py)。
+```python
+from swift.llm import get_model_tokenizer, get_template
+_, tokenizer = get_model_tokenizer('ZhipuAI/GLM-4-9B-0414', load_model=False)
+template = get_template(tokenizer.model_meta.template, tokenizer, agent_template='hermes')
+data = {...}
+template.set_mode('train')
+encoded = template.encode(data)
+print(f'[INPUT_IDS] {template.safe_decode(encoded["input_ids"])}\n')
+print(f'[LABELS] {template.safe_decode(encoded["labels"])}')
+```
+## tools格式
+tools字段提供了模型可以调用的API信息。你需要提供tools的名字，描述和参数，示例如下：
+```python
+tools = [{
+    'type': 'function',
+    'function': {
+        'name': 'get_current_weather',
+        'description': 'Get the current weather in a given location',
+        'parameters': {
+            'type': 'object',
+            'properties': {
+                'location': {
+                    'type': 'string',
+                    'description': 'The city and state, e.g. San Francisco, CA'
+                },
+                'unit': {
+                    'type': 'string',
+                    'enum': ['celsius', 'fahrenheit']
+                }
+            },
+            'required': ['location']
+        }
+    }
+}]
+```
+## loss_scale的使用
+loss_scale可以对模型输出部分的训练损失权重进行调节。例如在ReACT格式中，可以设置`--loss_scale react`（loss_scale配置文件书写在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/loss_scale/config/react.json)），该参数起到的作用是：
+'Thought:'和'Final Answer:'部分权重为1，'Action:'和'Action Input:'部分权重为2，'Observation:'字段本身权重为2，'Observation:'后面的工具调用结果权重为0。
+具体的loss_scale插件设计，请参考[插件化](../Customization/插件化.md)文档.
+## 训练
+- 训练Base模型的Agent能力，通过修改`--model`切换不同模型，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/agent/qwen2_5.sh)。
+- 训练GLM4的agent_template为hermes，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/agent/glm4.sh)。
+- 使用`--loss_scale`对模型输出部分的损失权重进行调整，参加[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/agent/loss_scale)。
+## 推理
+- 🚀原始模型或者全参数训练后模型的推理，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_agent.py)。
+- LoRA训练后推理，参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/agent/loss_scale/infer.md)。
+## 部署
+服务端和客户端代码，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/deploy/agent)。

ms-swift/docs/source/Instruction/GRPO.md ADDED Viewed

	@@ -0,0 +1,461 @@

+# GRPO
+论文地址
+[DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://arxiv.org/abs/2402.03300)
+环境安装
+```bash
+pip install math_verify==0.5.2 # reward function
+pip install -U trl
+```
+**更新日志**
+- **2025-05-11** — 支持生成式奖励模型，通过 reward_model_plugin 自定义奖励模型逻辑。有关更多详细信息，请参阅[自定义奖励模型](#自定义奖励模型)部分。
+- **2025-04-30** — external vllm server 的启动命令改为 `swift rollout`
+**FAQ**
+1. 训练过程中 loss 接近0 是正常情况， 参考[issue](https://github.com/huggingface/open-r1/issues/239#issuecomment-2646297851)
+2. 训练的steps怎么计算? 参考[issue](https://github.com/modelscope/ms-swift/issues/3912)
+3. clip_ratio为什么总是1? 参考[issue](https://github.com/huggingface/open-r1/issues/239#issuecomment-2646297851)
+## 集群支持
+![](../../resources/grpo.png)
+GRPO 训练框架支持集成高性能推理引擎（如 vLLM）来加速采样过程，提供以下两种部署模式：
+### 1. 内部集成模式 (Internal)
+- 在Trainer内部直接启动推理服务
+- 提供两种资源分配策略：
+  - **协同模式 (Colocate)**: 训练与推理共享GPU资源
+  - **异步模式 (Async)**: 训练与推理使用独立GPU资源
+### GRPO训练资源配置方案
+| 配置场景                 | NPROC_PER_NODE | num_infer_workers | 资源分配说明             |
+|--------------------------|----------------|------------------|------------------------|
+| **Colocate**   | =总GPU数      | =总GPU数          | 训练和推理共享全部GPU资源              |
+| **Async**      | =训练卡数      | =推理卡数         | 必须满足：训练卡数 + 推理卡数 = 总GPU数 |
+**注：**
+1. 在Colocate模式下推荐设置`sleep_level=1`, 在模型训练时释放vLLM占用显存
+2. 总GPU数指可见的GPU设备总数
+### 2. 外部服务模式 (External)
+连接外部的 vLLM 推理服务器
+使用时，使用以下参数配置外部 vLLM 服务器
+```bash
+--vllm_server_host <服务器IP> \
+--vllm_server_port <服务端口> \
+--vllm_server_timeout <超时时间> \
+```
+使用`swift rollout`命令部署vLLM 服务器, 现仅支持vLLM backend
+```bash
+CUDA_VISIBLE_DEVICES=2 \
+swift rollout \
+  --model Qwen/Qwen2.5-VL-7B-Instruct \
+  --tensor_parallel_size 2 \
+```
+完整脚本可以参考[这里](../../../examples/train/grpo/multi_node/Qwen2_5_32B_full.sh)
+## 奖励函数
+### 自定义奖励函数
+奖励函数接受模型生成的文本 completions 以及其他数据集中的列作为参数(kwargs)，并对模型生成的文本进行打分。以下是一个示例，展示了如何实现一个简单的长度奖励函数。该函数会在模型生成的文本长度超过 1024 时，给予 1.0 的奖励信号；否则，奖励信号为 0.0。
+```python
+from swift.plugin import ORM, orms
+class DummyLengthRewardFunction(ORM)
+    def __call__(completions, **kwargs):
+        return [1.0 if len(completion) > 1024 else 0.0 for completion in completions]
+orms['dummy']= DummyLengthRewardFunction
+```
+可以在`swift/examples/train/grpo/plugin/plugin.py`中加入该奖励函数，使用参数`--external_plugins examples/train/grpo/plugin/plugin.py`进行注册，并通过 reward_funcs 参数进行指定
+执行脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/grpo/plugin/run_external_rm.sh)
+### 内置奖励函数
+swift内置了五种基于规则的奖励函数(代码见swift/plugin/orm.py)
+| 奖励函数       | 论文                                                                 |
+|----------------|----------------------------------------------------------------------------|
+| accuracy       | [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via RL](https://arxiv.org/abs/2501.12948) |
+| format         | 同上                                                                        |
+| cosine         | [Demystifying Long Chain-of-Thought Reasoning in LLMs](https://arxiv.org/abs/2502.03373) |
+| repetition     | 同上                                                                        |
+| soft_overlong  | [Decoupled Clip and Dynamic sAmpling Policy Optimization (DAPO)](https://arxiv.org/abs/2503.14476)    |
+#### 1. **accuracy**
+该函数将模型的生成结果与数据集中的 solution 列进行比较，计算准确率分数。如果生成结果与标准答案一致，则得分为 1.0；否则为 0.0。
+注意：该奖励函数使用`math_verify`库解析生成结果和solution中的答案，可能只适用于特定的数学数据集。
+#### 2. **format**
+论文中使用以下system prompt要求模型按照固定格式进行返回
+```
+A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>
+```
+该函数检查模型是否按照 `<think>think content</think><answer>answer content</answer>` 的格式进行生成。如果生成文本符合格式要求，则得分为 1.0；否则为 0.0。
+#### 3. **cosine**
+论文发现，仅使用 accuracy 奖励函数进行训练会导致模型的生成长度趋于超长，从而影响训练效果。cosine 奖励函数通过控制模型的生成长度来优化训练过程：
+- 对于生成正确答案的文本，奖励值随长度增加而递减，鼓励模型生成简洁的回答。
+- 对于生成错误答案的文本，奖励值随长度增加而递增，鼓励模型进行更深入的思考。
+使用余弦函数平滑地调整奖励值，确保奖励变化在合理范围内。余弦函数的参数包括生成文本的长度、最大长度限制以及奖励的最小值和最大值。
+参数
+- cosine_min_len_value_wrong（默认值：-0.5）：生成错误答案时，最小长度对应的奖励值。
+- cosine_max_len_value_wrong（默认值：0.0）：生成错误答案时，最大长度对应的奖励值。
+- cosine_min_len_value_correct（默认值：1.0）：生成正确答案时，最小长度对应的奖励值。
+- cosine_max_len_value_correct（默认值：0.5）：生成正确答案时，最大长度对应的奖励值。
+- cosine_max_len（默认值等于模型生成的最大程度）：生成文本的最大长度限制。
+#### 4. **repetition**
+惩罚模型生成文本中的重复内容，通过检测生成文本中的重复 n-gram 模式来评估重复程度，并给予相应的惩罚。
+函数将生成文本分割为单词，并提取指定大小的 n-gram（默认为 3-gram）。通过统计不同 n-gram 的数量与总 n-gram 数量的比例，计算重复比例。如果生成文本中重复的 n-gram 比例较高，则给予较大的负奖励（惩罚）。惩罚值通过重复比例和最大惩罚值（默认为 -1.0）计算得出。
+参数
+- repetition_n_grams（默认值：3）：用于检测重复的 n-gram 大小。
+- repetition_max_penalty（默认值：-1.0）：最大惩罚值，用于控制惩罚的强度。
+#### 5. **soft overlong punishment**
+定义长度惩罚区间。在这个区间内，给予[-1,0]的线性惩罚。
+参数
+- soft_max_length: 论文中的L_max，模型的最大生成长度，默认等于max_completion_length
+- soft_cache_length: 论文中的L_cache，控制长度惩罚区间，区间为[soft_max_length-soft_cache_length, soft_max_length]
+论文原文
+> a length-aware penalty mechanism designed to shape the reward for truncated samples. Specifically, when the response length exceeds the predefined maximum value, we define a punishment interval. Within this interval, the longer the response, the greater the punishment it receives. This penalty is added to the original rule-based correctness reward, thereby signaling to the model to avoid excessively long responses.
+6. **奖励模型**
+除了基于规则的奖励函数外，本框架还支持使用奖励模型作为奖励函数。在使用奖励模型时，需要指定 reward_model 参数，该参数与 model 参数类似，用于指定奖励模型的路径或名称。需要注意的是，reward_model 和 reward_funcs 至少需要指定一个。
+## 参数与运行脚本
+参数
+- per_device_train_batch_size: 每个设备训练批量大小，在GRPO中，指 completion 的批次大小。
+- per_device_eval_batch_size: 每个设备评估批量大小，在GRPO中，指 completion 的批次大小。
+- num_generations: 每个prompt采样的数量，论文中的G值，需要被 per_device_batch_size * gradient_accumulation_steps * nproc_per_node 整除，默认为8
+- max_completion_length: 采样生成的最大长度，默认为512
+- ds3_gather_for_generation: 该参数适用于DeepSpeed ZeRO-3。如果启用，策略模型权重将被收集用于生成，从而提高生成速度。然而，禁用此选项允许训练超出单个GPU VRAM的模型，尽管生成速度会变慢。禁用此选项与vLLM生成不兼容。默认为True
+- reward_funcs: 奖励函数，根据模型生成结果进行打分，内置accuracy、format、cosine和repetition四个rule-based函数，详细见 swift/plugin/orm.py 文件
+- reward_weights: 每个奖励函数的权重。必须与奖励函数和奖励模型的总数量匹配。如果为 None，则所有奖励的权重都相等，为`1.0`
+  - 提示：如果GRPO训练中包含`--reward_model`，则其加在奖励函数的最后位置
+- reward_model: 同model, 使用奖励模型作为奖励函数，与reward_funcs至少需要指定一个。
+- reward_model_plugin: 奖励模型逻辑，默认为orm逻辑, 详细见[自定义奖励模型](#自定义奖励模型)。
+- dataset_shuffle: 是否对dataset进行随机操作，默认为True
+- loss_type: loss 归一化的类型，可选项为['grpo', 'bnpo', 'dr_grpo'], 默认为'grpo', 具体查看该[pr](https://github.com/huggingface/trl/pull/3256#discussion_r2033213348)
+- log_completions: 是否记录训练中的模型生成内容，搭配 `--report_to wandb` 使用。默认为False
+  - 提示：若没有设置`--report_to wandb`，则会在checkpoint中创建`completions.jsonl`来存储生成内容
+- use_vllm: 是否使用vLLM作为采样的生成后端，默认为False，建议使用加快训练速度
+- vllm_device: 设置vLLM部署的设备，默认为`auto`, 即未被使用的第一张显卡，使用`cuda:x`来设置特定的卡。
+- vllm_gpu_memory_utilization: vllm透传参数，默认为0.9
+- vllm_max_model_len: vllm透传参数，默认为None
+- vllm_max_num_seqs: vllm透传参数，默认为256
+- vllm_enforce_eager: vllm透传参数，默认为False
+- vllm_limit_mm_per_prompt: vllm透传参数，默认为None
+- vllm_enable_prefix_caching: vllm透传参数，默认为True
+- vllm_server_host：vLLM server host地址，默认为None，使用外部vLLM server时使用
+- vllm_server_port vLLM server 服务端口，默认为8000
+- vllm_server_timeout 连接vLLM server的超时时间，默认为120s
+- num_iterations: 每个批次代更新次数，默认为1.
+- epsilon: clip 系数，默认为0.2.
+- epsilon_high: upper clip 系数，默认为None，设置后与epsilon共同构成[epsilon, epsilon_high]裁剪范围.
+- async_generate: 异步rollout以提高训练速度，默认`false`.
+- sleep_level: vllm特有参数，在训练和rollout复用卡的时候，可以选择vllm进行offload.
+- move_model_batches: 在模型向vLLM/LMDeploy等快速推理框架移动参数时，将layers分为多少个batch. 默认为None, 代表整个模型不进行拆分，否则拆分为move_model_batches+1(非layer参数)+1(多模态部分参数)个
+- offload_optimizer: 是否在vLLM/LMDeploy推理时offload optimizer参数，默认为False
+- offload_model: 是否在vLLM/LMDeploy推理时offload 模型本身，默认为False
+  - 注意：若该参数设置为True，训练时grad_norm一直为0，请安装`vllm==0.7.3`
+- gc_collect_after_offload: 是否在offload结束时进行gc（python gc和GPU gc），默认为False
+- multi_turn_func: 多轮GRPO参数, 传入对应的plugin名称, 同时在plugin/multi_turn.py中添加好对应的实现
+- dynamic_sample：筛除group内奖励标准差为0的数据，额外采样新数据，默认为False。
+- max_resample_times：dynamic_sample设置下限制重采样次数，默认3次。
+- overlong_filter：跳过超长截断的样本，不参与loss计算，默认为False。
+- vllm_server_host：vLLM server host地址，默认为None，使用外部vLLM server时使用 \
+- vllm_server_port vLLM server 服务端口，默认为8000 \
+- vllm_server_timeout 连接vLLM server的超时时间，默认为120s \
+奖励函数参数，见[内置奖励函数](#内置奖励函数)
+可以使用vLLM、LMDeploy作为采样后端加速训练
+多卡vLLM
+```bash
+# async mode
+# 要求 num_infer_workers(部署) + NPROC_PER_NODE(训练) = device_count
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=7 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --reward_funcs accuracy format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.7 \
+    --vllm_max_model_len 8192 \
+    --num_infer_workers 1 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 7 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true
+# colocate mode
+# 要求 num_infer_workers(部署) = NPROC_PER_NODE(训练) = device_count
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-1.5B \
+    --reward_funcs accuracy format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.5 \
+    --vllm_max_model_len 8192 \
+    --num_infer_workers 8 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 8 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true \
+    --sleep_level 1 \
+    --offload_model true \
+    --offload_optimizer true \
+    --gc_collect_after_offload true \
+    --log_completions true
+```
+单卡
+```bash
+# PT backend
+CUDA_VISIBLE_DEVICES=0 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --reward_funcs accuracy format \
+    --train_type lora \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#1000' \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 4 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --log_completions true
+# vLLM backend
+CUDA_VISIBLE_DEVICES=0 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --vllm_gpu_memory_utilization 0.5 \
+    --use_vllm true \
+    --sleep_level 1 \
+    --offload_model true \
+    --offload_optimizer true \
+    --gc_collect_after_offload true \
+    --reward_funcs accuracy format \
+    --train_type lora \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#1000' \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 4 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --log_completions true
+```
+多机训练参考[这里](../../../examples/train/grpo/multi_node/)
+注：内部集成模式下，需要不同节点的GPU配置以及训练参数相同
+## 自定义奖励模型
+默认情况下，奖励模型指的是包含数值头的分类模型（通常称为输出奖励模型（ORM））。这些模型对其他模型的输出进行评分，产生一个标量值，表示模型响应的质量。
+目前，我们可以利用reward_model_plugin灵活地自定义奖励模型的处理逻辑。这使得实现诸如生成式奖励模型等技术成为可能，包括：
+- 自定义模型的系统提示：定义特定的指令和上下文以指导评估过程。
+- 处理模型交互历史：管理对话上下文，以提供有意义且具有上下文感知的评估。
+- 定义自定义评估标准：设置独特的标准和度量，用于评估模型的响应，超越默认的准确性和相关性衡量标准。
+通过reward_model_plugin，开发者可以针对其应用的特定需求定制奖励评估过程。这种灵活性允许更细致和有效的基于奖励的训练策略。
+我们在 [rm_plugin.py](../../../swift/plugin/rm_plugin.py) 中提供了一个简单的生成式奖励模型示例（GenRMPlugin）。
+您还可以在 [plugin.py](../../../examples/train/grpo/plugin/plugin.py) 中自定义您的奖励模型插件，并使用 `external_plugins` 参数进行注册。
+以下是一个训练脚本示例，用于使用两个奖励模型，包括一个 ORM 和一个 Gen-RM（此处使用 qwen2.5-3B-Instruct）进行 GRPO 训练：
+```
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --dataset AI-MO/NuminaMath-TIR#5000 \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs format \
+    --reward_model Qwen/Qwen2.5-3B-Instruct Shanghai_AI_Laboratory/internlm2-7b-reward \
+    --reward_model_plugin genrm my_rmplugin \
+    --reward_weights 0.1 1 1 \
+    --num_infer_workers 8 \
+    --vllm_gpu_memory_utilization 0.5 \
+    --sleep_level 1 \
+    --offload_model true \
+    --offload_optimizer true \
+    --gc_collect_after_offload true \
+    --log_completions true \
+    --deepspeed zero2
+```
+注意：
+1. 在 GRPOTrainer 中，reward_model 会依次append到 reward_funcs 中。因此，reward_weights 的顺序对应 [reward_funcs, reward_model]。
+2. reward_model_plugin 默认为 default，即使用 ORM 处理逻辑。
+## DAPO
+[Decoupled Clip and Dynamic sAmpling Policy Optimization (DAPO)](https://arxiv.org/abs/2503.14476)在GRPO的基础上设置了几种trick，分别是
+- Clip Higher
+- Dynamic Sampling
+- Overlong Filtering
+- Token level Loss
+- Soft Overlong Punishment
+其中Token level Loss是默认实现，不用额外设置。对于其余trick，我们可��基于GRPOTrainer，设置以下参数实现。
+| 参数                 | 类型      | 值      |
+|----------------------|-----------|-------------|
+| `--epsilon_high`     | `float`   | `0.28`      |
+| `--dynamic_sample`   | `bool`    | `true`      |
+| `--overlong_filter`  | `bool`    | `true`      |
+| `--reward_funcs`     | `str`     | `soft_overlong`|
+| `--max_resample_times` | `int`    | `3`        |
+参考训练脚本(八卡colocate mode)
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+WANDB_API_KEY=xxx \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-1.5B \
+    --reward_funcs accuracy soft_overlong \
+    --max_completion_length 4096 \
+    --soft_cache_length 819 \
+    --epsilon 0.2 \
+    --epsilon_high 0.28 \
+    --dynamic_sample true \
+    --overlong_filter true \
+    --max_resample_times 3 \
+    --use_vllm true \
+    --vllm_gpu_memory_utilization 0.6 \
+    --num_infer_workers 8 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset AI-MO/NuminaMath-TIR#5000 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-6 \
+    --eval_steps 1000 \
+    --save_steps 1000 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --top_p 1.0 \
+    --deepspeed zero2 \
+    --log_completions true \
+    --num_iterations 1 \
+    --report_to tensorboard wandb \
+    --beta 0.0 \
+```

ms-swift/docs/source/Instruction/Megatron-SWIFT训练.md ADDED Viewed

	@@ -0,0 +1,293 @@

+# Megatron-SWIFT训练
+SWIFT引入了Megatron的并行技术来加速大模型的训练，包括数据并行、张量并行、流水线并行、序列并行，上下文并行，专家并行。支持Qwen3、[Qwen3-MoE](https://github.com/modelscope/ms-swift/blob/main/examples/train/megatron/qwen3_moe.sh)、Qwen2.5、Llama3、Deepseek-R1蒸馏系等模型的预训练和微调。完整支持的模型可以参考[支持的模型与数据集文档](./支持的模型和数据集.md)。
+## 环境准备
+使用Megatron-SWIFT，除了安装swift依赖外，还需要安装以下内容：
+```shell
+# 推荐torch版本：2.5 / 2.6
+pip install pybind11
+# transformer_engine
+# 若出现安装错误，可以参考该issue解决: https://github.com/modelscope/ms-swift/issues/3793
+pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
+# apex
+git clone https://github.com/NVIDIA/apex
+cd apex
+pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
+# megatron-core
+pip install git+https://github.com/NVIDIA/Megatron-LM.git@core_r0.12.0
+```
+或者你也可以使用镜像：
+```
+modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
+modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
+```
+依赖库Megatron-LM中的训练模块将由swift进行git clone并安装。你也可以通过环境变量`MEGATRON_LM_PATH`指向已经下载好的repo路径（断网环境，[core_r0.12.0分支](https://github.com/NVIDIA/Megatron-LM/tree/core_r0.12.0)）。
+## 快速入门案例
+这里介绍使用2卡80GiB A100对Qwen2.5-7B-Instruct模型进行自我认知微调的快速入门案例，以下最佳实践可以在10分钟内完成。
+首先，我们需要将HF格式的权重转为Megatron格式：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --to_mcore true \
+    --torch_dtype bfloat16 \
+    --output_dir Qwen2.5-7B-Instruct-mcore
+```
+然后，使用以下脚本进行训练，训练所需显存资源为2*80GiB：
+```shell
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1 \
+megatron sft \
+    --load Qwen2.5-7B-Instruct-mcore \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --tensor_model_parallel_size 2 \
+    --micro_batch_size 4 \
+    --global_batch_size 16 \
+    --recompute_granularity selective \
+    --train_iters 100 \
+    --eval_iters 5 \
+    --finetune true \
+    --cross_entropy_loss_fusion true \
+    --lr 1e-5 \
+    --lr_warmup_iters 10 \
+    --min_lr 1e-6 \
+    --save megatron_output/Qwen2.5-7B-Instruct \
+    --save_interval 100 \
+    --max_length 2048 \
+    --system 'You are a helpful assistant.' \
+    --num_workers 4 \
+    --no_save_optim true \
+    --no_save_rng true \
+    --dataset_num_proc 4 \
+    --model_author swift \
+    --model_name swift-robot
+```
+最后，将Megatron格式权重转为HF格式：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --mcore_model megatron_output/Qwen2.5-7B-Instruct/vx-xxx \
+    --to_hf true \
+    --torch_dtype bfloat16 \
+    --output_dir megatron_output/Qwen2.5-7B-Instruct/vx-xxx-hf
+```
+我们对生成的HF格式权重进行推理：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model megatron_output/Qwen2.5-7B-Instruct/vx-xxx-hf \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+推理结果如下：
+```
+<<< who are you?
+I am a language model developed by swift, you can call me swift-robot. How can I assist you?
+```
+- 若要进行预训练，你可以使用`megatron pt`替代`megatron sft`，这将会使用生成式的template进行训练。
+- **更多案例**：包括packing、多机、32K上下文、MoE模型、预训练，可以查看[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/megatron)。
+## Benchmark
+使用`megatron sft`和`swift sft`在单机八卡A800环境下进行Dense/MoE模型全参数训练的速度对比如下，对应脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/megatron/benchmark)。
+**Dense** Qwen2.5-14B:
+|          | Megatron-LM | Deepspeed-ZeRO2 | Deepspeed-ZeRO3 |
+| -------- | ----------- | ---------- | ---------- |
+| 训练速度 |      9.04s/it       |  10.32s/it   | 10.56s/it |
+| 显存占用 | 8\*64GB     |  8\*80GB   | 8\*58GB |
+**MoE** Qwen1.5-MoE-A2.7B:
+|          | Megatron-LM | Deepspeed-ZeRO2 | Deepspeed-ZeRO3 |
+| -------- | ----------- | ---------- | ---------- |
+| 训练速度 |      2.93s/it       |  6.02s/it   | 24.30s/it |
+| 显存占用 | 8\*66GB     |  8\*72GB   | 8\*50GB |
+## 命令行参数
+### Megatron参数
+**训练参数**:
+- 🔥micro_batch_size: 每个device的批次大小，默认为1。
+- 🔥global_batch_size: 总批次大小，等价于`micro_batch_size*数据并行大小*梯度累加步数`。默认为16。
+- 🔥recompute_granularity: 重新计算激活的粒度，可选项为'full', 'selective'。其中full代表重新计算整个transformer layer，selective代表只计算transformer layer中的核心注意力部分。通常'selective'是推荐的。默认为'selective'。
+- 🔥recompute_method: 该参数需将recompute_granularity设置为'full'才生效，可选项为'uniform', 'block'。默认为None。
+- 🔥recompute_num_layers: 该参数需将recompute_granularity设置为'full'才生效，默认为None。若`recompute_method`设置为uniform，该参数含义为每个均匀划分的重新计算单元的transformer layers数量。例如你可以指定为`--recompute_granularity full --recompute_method uniform --recompute_num_layers 4`。recompute_num_layers越大，显存占用越小，计算成本越大。默认为None。
+- recompute_modules: 选项包括"core_attn", "moe_act", "layernorm", "mla_up_proj", "mlp", "moe" ，默认值为，["core_attn"]。例如在MoE训练时，你可以通过指定`--recompute_granularity selective --recompute_modules core_attn moe`降低显存。其中"core_attn"、"mlp" 和 "moe" 使用常规检查点，"moe_act"、"layernorm" 和 "mla_up_proj" 使用输出丢弃检查点。
+  - "core_attn"：重新计算 Transformer 层中的核心注意力部分。
+  - "mlp"：重新计算密集的 MLP 层。
+  - "moe"：重新计算 MoE 层。
+  - "moe_act"：重新计算 MoE 中的 MLP 激活函数部分。
+  - "layernorm"：重新计算 input_layernorm 和 pre_mlp_layernorm。
+  - "mla_up_proj"：重新计算 MLA 上投影和 RoPE 应用部分。
+- deterministic_mode: 确定性模式，这会导致训练速度下降，默认为False。
+- 🔥train_iters: 训练的总迭代次数，默认为None。
+- 🔥log_interval: log的时间间隔（单位：iters），默认为5。
+- tensorboard_dir: tensorboard日志写入的目录。默认None，即存储在`f'{save}/runs'`目录下。
+- no_masked_softmax_fusion: 默认为False。用于禁用query_key_value的scaling, masking, and softmax融合。
+- no_bias_dropout_fusion: 默认为False。用于禁用bias和dropout的融合。
+- no_bias_swiglu_fusion: 默认为False。指定`--no_bias_dropout_fusion true`，用于禁止bias和swiglu融合。
+- no_rope_fusion: 默认为False。指定`--no_rope_fusion true`用于禁止rope融合。
+- no_gradient_accumulation_fusion: 默认为False。指定`--no_gradient_accumulation_fusion true`用于禁用梯度累加融合。
+- 🔥cross_entropy_loss_fusion: 启动交叉熵损失计算融合。默认为False。
+- calculate_per_token_loss: 根据全局批次中的非填充token数量来对交叉熵损失进行缩放。默认为True。
+- 🔥attention_backend: 使用的注意力后端 (flash、fused、unfused、local、auto)。默认为 auto。
+- optimizer: 优化器类型，可选为'adam'、'sgd'。默认为adam。
+- dataloader_type: 默认为'cyclic'，可选为'single', 'cyclic', 'external'。若开启`--streaming`，则设置为`external`。
+- manual_gc: 禁用默认垃圾回收器，手动触发垃圾回收。默认为False。
+- manual_gc_interval: 触发垃圾回收的间隔。默认为0。
+- seed: python、numpy、pytorch和cuda的随机种子，默认为42。
+- 🔥num_workers: dataloder的workers数量，默认为4。
+  - 注意：若设置`--streaming true`，则设置为1。
+- seq_length: 默认为None，即设置为`max_length`。对数据集长度进行限制请使用基本参数中的`--max_length`控制，无需设置此参数。
+- use_cpu_initialization: 在cpu上初始化权重，默认为False。在进行HF和MCore权重转换时会被使用。
+- no_create_attention_mask_in_dataloader: 在dataloader中不创建attention mask，默认为True。
+**学习率参数**:
+- 🔥lr: 初始学习率，最终会根据学习率预热策略和衰减策略决定每个迭代的学习率，默认为1e-5。
+- lr_decay_style: 学习率衰减策略，默认为'cosine'。通常设置为'cosine', 'linear', 'constant'。
+- 🔥lr_decay_iters: 学习率衰减的迭代次数。默认为None，则设置为`--train_iters`。
+- 🔥lr_warmup_iters: 线性学习率预热的迭代次数，默认为0。
+- 🔥min_lr: 学习率的最小值，将低于改阈值的学习率裁剪为该值，默认为0。
+**正则化参数**:
+- 🔥weight_decay: 默认为0.1。
+- 🔥clip_grad: l2梯度裁剪，默认为1.0。
+- adam_beta1: 默认0.9。
+- adam_beta2: 默认0.95。
+- adam_eps: 默认1e-8。
+- sgd_momentum: 默认为0.9。
+**checkpoint参数**:
+- 🔥save: checkpoint的输出目录，默认None。在训练中，若未设置该参数，则默认为`f'megatron_output/{model_suffix}'`，例如`'megatron_output/Qwen2.5-7B-Instruct'`。
+  - 注意：若在多机训练时，请确保每个节点的保存路径指向相同位置。否则你需要在训练后手动集中这些权重。
+- 🔥save_interval: checkpoint保存的间隔（steps），默认为500。
+  - 注意：训练结束时一定会保存权重。
+- 🔥no_save_optim: 不保存optimizer，默认为False。
+- 🔥no_save_rng: 不保存rng，默认为False。
+- 🔥load: 加载的checkpoint目录，默认None。
+- 🔥no_load_optim: 不载入optimizer，默认为False。
+- 🔥no_load_rng: 不载入rng，默认为False。
+- 🔥finetune: 将模型加载并微调。不加载检查点的优化器和随机种子状态，并将迭代数设置为0。默认为False。
+- ckpt_format: checkpoint的格式。可选为'torch', 'torch_dist', 'zarr'。默认为'torch_dist'。
+- no_initialization: 不对权重进行初始化，默认为True。
+- auto_detect_ckpt_format: 自动检测ckpt format为legacy还是distributed格式。默认为True。
+- exit_on_missing_checkpoint: 如果设置了`–-load`，但找不到检查点，则直接退出，而不是初始化。默认为True。
+**分布式参数**:
+- distributed_backend: 分布式后端，可选为'nccl', 'gloo'。默认为nccl。
+- 🔥use_distributed_optimizer: 使用分布式优化器。默认为True。
+- 🔥tensor_model_parallel_size: tp数，默认为1。
+- 🔥pipeline_model_parallel_size: pp数，默认为1。
+- decoder_first_pipeline_num_layers: decoder第一个流水线阶段所包含的Transformer层数。默认为 None，表示将Transformer层数平均分配到所有流水线阶段。
+- decoder_last_pipeline_num_layers: decoder最后一个流水线阶段所包含的Transformer层数。默认为 None，表示将Transformer层数平均分配到所有流水线阶段。
+- 🔥sequence_parallel: 启动序列并行的优化器。默认为False。
+- 🔥context_parallel_size: cp数，默认为1。
+- tp_comm_overlap: 启用张量并行通信与GEMM（通用矩阵乘法）内核的重叠（降低通信耗时）。默认为False。
+- overlap_grad_reduce: 启用DDP中grad reduce操作的重叠（降低DP通信耗时）。默认为False。
+- overlap_param_gather: 启用分布式优化器中参数all-gather的重叠（降低DP通信耗时）。默认为False。
+- distributed_timeout_minutes: torch.distributed的timeout时间（单位为分钟），默认为60分钟。
+**日志参数**:
+- log_params_norm: 记录参数的norm。默认为False。
+- log_throughput: 记录每个GPU的吞吐量。默认为True。
+  - 注意：在非packing情况下，log_throughput并不准确，因为`seq_length`并不等于真实序列长度。
+- tensorboard_log_interval: 记录到tensorboard的间隔（steps），默认为1。
+- tensorboard_queue_size: 队列长度（与磁盘IO相关），类似于写入的间隔。默认为50。
+- log_timers_to_tensorboard: 记录timers到tensorboard。默认为True。
+- no_log_learning_rate_to_tensorboard: 不记录学习率到tensorboard。默认为False。
+- log_validation_ppl_to_tensorboard: 将验证困惑度写入tensorboard。默认为True。
+- log_memory_to_tensorboard: 将内存日志写入tensorboard。默认为True。
+- logging_level: 日志级别。默认为None。
+- wandb_project: wandb 项目名称。默认为''，即忽略wandb。
+- wandb_exp_name: wandb 实验名称。默认为''。
+- wandb_save_dir: 本地保存 wandb 结果的路径。默认为''。
+**评估参数**:
+- 🔥eval_iters: 评估的迭代次数，默认为100。
+- 🔥eval_interval: 评估的间隔（steps），默认为None，即设置为save_interval。
+**混合精度参数**:
+- fp16: fp16模式。默认为None，会根据模型的torch_dtype进行设置。torch_dtype默认读取config.json。
+- bf16: bf16模式。默认为None，会根据模型的torch_dtype进行设置。
+- apply_query_key_layer_scaling: 将`Q * K^T` 缩放为 `1 / 层数`（例如：第layer_num层则除以layer_num）。这对fp16训练很有帮助。默认为None，即若使用`--fp16`，则设置为True。
+- attention_softmax_in_fp32: 在attention_mask和softmax中使用fp32进行计算。默认为True。
+**模型参数**: （以下参数通常不需要进行设置，会根据HF模型的config.json进行配置，用户无需关心）
+- num_layers: transformer layers的层数，默认为None。
+- hidden_size: transformer hidden size，默认为None。
+- ffn_hidden_size: transformer FFN层的hidden size。默认为None，设置为`4*hidden_size`。
+- num_attention_heads: transformer attention heads的个数，默认为None。
+- group_query_attention: 默认为None。若`num_query_groups>1`，group_query_attention设置为True，否则为False。
+- num_query_groups: 默认为1。
+- max_position_embeddings: 位置编码的最大长度，默认为None。
+- position_embedding_type: 位置编码的类型，可选为'learned_absolute'、'rope'、'relative'和'none'，默认为'rope'。
+- rotary_base: 默认为10000。
+- rotary_percent: 默认为1.。
+- normalization: 可选为'LayerNorm', 'RMSNorm'，默认为RMSNorm。
+- norm_epsilon: 默认为1e-5。
+- swiglu: 使用swiglu替代默认的gelu。默认为True。
+- untie_embeddings_and_output_weights: 解开embedding和输出权重的绑定，默认为True。
+- disable_bias_linear: 禁用linear层的bias。默认为True。
+- add_qkv_bias: 仅在QKV的linear中增加bias，默认为True。
+- attention_dropout: 默认为0.。
+- hidden_dropout: 默认为0.。
+- kv_channels: 默认为None，设置为`args.hidden_size // args.num_attention_heads`。
+- qk_layernorm: 是否对Q和K进行层归一化。
+- transformer_impl: 使用哪种transformer实现，可选项为'local'和'transformer_engine'。默认为transformer_engine。
+- padded_vocab_size: 完整词表大小，默认为None。
+- rope_scaling: rope_scaling相关参数，默认为None。格式参考[llama3.1 config.json](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct/file/view/master?fileName=config.json&status=1)，传入json字符串。
+- model_type: Huggingface模型权重中config.json中的model_type。
+**MoE参数**:
+- num_experts: MoE的专家数，默认为None。自动从config.json读取。
+- moe_ffn_hidden_siz: 每个专家的前馈网络（ffn）的隐藏层大小。默认为None，设置为ffn_hidden_size。自动从config.json读取。
+- moe_shared_expert_intermediate_size: 共享专家的总FFN隐藏层大小。如果有多个共享专家，它应等于 `num_shared_experts * ffn_size_of_each_shared_expert`。 默认为None。自动从config.json读取。
+- moe_router_topk: 每个token路由到的专家数量。默认为None。自动从config.json读取。
+- moe_router_pre_softmax: 为MoE启用预softmax路由，这意味着softmax会在top-k选择之前进行。默认为None。自动从config.json读取。
+- 🔥moe_aux_loss_coeff: 辅助损失的缩放系数：建议的初始值为 1e-2。默认为None。自动从config.json读取。
+- 🔥expert_model_parallel_size: 专家并行数，默认为1。
+- moe_token_dispatcher_type: 要使用的token分发器类型。可选选项包括 'allgather'、'alltoall' 和 'alltoall_seq'。默认值为 'alltoall'。
+- moe_grouped_gemm: 当每个rank包含多个专家时，通过在多个流中启动多个本地 GEMM 内核，利用 TransformerEngine中的GroupedLinear提高利用率和性能。默认为False。
+- moe_router_load_balancing_type: 确定路由器的负载均衡策略。可选项为"aux_loss"、"seq_aux_loss"、"sinkhorn"、"none"。默认值为 "aux_loss"。
+- moe_z_loss_coeff: z-loss 的缩放系数。默认为None。
+- moe_expert_capacity_factor: 每个专家的容量因子，None表示不会丢弃任何token。默认为None。
+- moe_shared_expert_overlap: 启用共享专家计算与调度器通信之间的重叠。如果不启用此选项，共享专家将在路由专家之后执行。仅在设置了`moe_shared_expert_intermediate_size`时有效。默认为False。
+### Megatron训练参数
+Megatron训练参数继承自Megatron参数和基本参数。基本参数的内容可以参考[这里](./命令行参数.md#基本参数)。此外还包括以下参数：
+- add_version: 在`save`上额外增加目录`'<版本号>-<时间戳>'`防止权重覆盖，默认为True。
+- 🔥packing: 是否使用序列packing，默认为False。
+- 🔥streaming: 流式读取并处理数据集，默认False。通常在处理大型数据集时，设置为True。更多流式的参数查看命令行参数文档。
+- lazy_tokenize: 默认为False。若该参数设置为False，则在训练之前对所有的数据集样本进行tokenize（这可以避免在训练中出现报错）；设置为True，则在训练中对数据集进行tokenize（这可以节约内存）。
+- dataloader_persistent_workers: 透传入dataloader的参数，默认为True。
+- dataloader_prefetch_factor: 透传入dataloader的参数，默认为10。
+- max_epochs: 训练到`max_epochs`时强制退出训练，并对权重进行验证和保存。该参数在使用流式数据集时很有用。默认为None。

ms-swift/docs/source/Instruction/ReleaseNote3.0.md ADDED Viewed

	@@ -0,0 +1,80 @@

+# ReleaseNote 3.0
+> 如果您在3.x版本使用上遇到任何问题，请提交issue给我们。如存在2.x可用而3.x不可用的情况请暂时使用2.x版本等待我们修复完成。
+## 新功能
+1. 数据集模块重构。数据集加载速度提升2-20倍，encode速度提升2-4倍，支持streaming模式
+    - 移除了dataset_name机制，采用dataset_id、dataset_dir、dataset_path方式指定数据集
+    - 使用`--dataset_num_proc`支持多进程加速处理
+    - 使用`--streaming`支持流式加载hub端和本地数据集
+    - 支持`--packing`命令以获得更稳定的训练效率
+    - 指定`--dataset <dataset_dir>`支持本地加载开源数据集
+2. 对模型进行了重构：
+    - 移除了model_type机制，使用`--model <model_id>/<model_path>`来训练和推理
+    - 若是新模型，直接使用`--model <model_id>/<model_path> --template xxx --model_type xxx`，无需书写python脚本进行模型注册
+3. template模块重构：
+    - 使用`--template_backend jinja`采用jinja模式推理
+    - 采用messages格式作为入参接口
+4. 支持了plugin机制，用于定制训练过程，目前支持的plugin有：
+    - callback 定制训练回调方法
+    - loss 定制loss方法
+    - loss_scale 定制每个token的权重
+    - metric 定制交叉验证的指标
+    - optimizer 定制训练使用的optimizer和lr_scheduler
+    - tools 定制agent训练的system格式
+    - tuner 定制新的tuner
+4. 训练模块重构：
+    - 支持了一行命令启动多机训练，详情查看[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node/deepspeed/README.md)
+    - 支持所有多模态LLM的PreTrain
+    - 训练中的predict_with_generate采用infer模块，支持多模态LLM和多卡
+    - 人类对齐KTO算法支持多模态LLM
+5. 推理与部署模块重构：
+    - 支持pt backend下的batch推理，支持多卡推理
+    - 推理和部署模块统一采用openai格式接口
+    - 支持了异步推理接口
+6. app-ui合并入web-ui，app-ui支持多模态推理
+7. 支持All-to-All模型，即Emu3-Gen或Janus等文生图或全模态模型的训练和部署等
+8. 对examples进行了功能提升，目前examples可以全面反映SWIFT的能力，易用性更强
+9. 使用`--use_hf true/false`来切换HuggingFace社区和ModelScope社区的数据集模型的下载上传
+10. 更好地支持了以代码形式进行训练、推理，代码结构更清晰，并补充了大量的代码注释
+## BreakChange
+本文档列举3.x版本和2.x版本的BreakChange。开发者在使用时应当注意这些不同。
+### 参数差异
+- model_type的含义发生了变化。3.0版本只需要指定--model，model_type仅当模型为SWIFT不支持模型时才需要额外指定
+- sft_type更名为train_type
+- model_id_or_path更名为model
+- template_type更名为template
+- quantization_bit更名为quant_bits
+- check_model_is_latest更名为check_model
+- batch_size更名为per_device_train_batch_size，沿用了transformers的命名规则
+- eval_batch_size更名为per_device_eval_batch_size，沿用了transformers的命名规则
+- tuner_backend移除了swift选项
+- use_flash_attn更名为attn_impl
+- bnb_4bit_comp_dtype更名为bnb_4bit_compute_dtype
+- 移除了train_dataset_sample和val_dataset_sample
+- dtype更名为torch_dtype，同时选项名称从bf16变更为标准的bfloat16，fp16变更为float16，fp32变更为float32
+- 移除了eval_human选项
+- dataset选项移除了HF::使用方式，使用新增的--use_hf控制下载和上传
+- 移除了do_sample选项，使用temperature进行控制
+- add_output_dir_suffix更名为add_version
+- 移除了eval_token，使用api_key支持
+- target_modules(lora_target_modules)的ALL改为了all-linear，含义相同
+- deepspeed的配置更改为`default-zero2`->`zero2`, `default-zero3`->`zero3`
+- infer/deploy/export移除了--ckpt_dir参数，使用--model, --adapters进行控制
+2.0标记为compatible参数的部分整体移除了。
+### 功能
+1. 预训练请使用swift pt命令。该命令会默认使用generation template，而swift sft命令默认使用model_type预置的template
+2. 整体移除了2.x版本的examples目录，并添加了按功能类型划分的新examples
+3. 数据集格式完全向messages格式兼容，不再支持query/response/history格式
+4. merge_lora的存储目录可以通过`--output_dir`指定了，且merge_lora和量化不能在一个命令中执行，需要最少两个命令
+5. 使用`swift app --model xxx`开启app-ui界面，支持了多模态界面推理
+6. 移除了AIGC的依赖以及对应的examples和训练代码

ms-swift/docs/source/Instruction/人类对齐.md ADDED Viewed

	@@ -0,0 +1,115 @@

+# 人类对齐
+本文档提供了各种人类偏好对齐算法的训练脚本。若您希望深入了解更详尽的算法信息及其选择方法，请参考[文档](https://github.com/modelscope/modelscope-classroom/blob/main/LLM-tutorial/M.%E4%BA%BA%E7%B1%BB%E5%81%8F%E5%A5%BD%E5%AF%B9%E9%BD%90%E8%AE%AD%E7%BB%83.md)
+## 数据集
+PPO与GRPO算法所需的数据仅为模型输入，也就是system prompt（可选）加上query。其中GRPO中的奖励函数可能需要额外的数据列，比如计算准确率需要`solution`列作为参考答案。
+RM和DPO类算法如ORPO，CPO，SimPO，则需要 $(x,y_w,y_l)$ 格式的数据，其中 $x$ 表示模型输入，$y_w,y_l$ 分别表示符合人类偏好的偏好回答和不符合人类偏好的拒绝回答,比如![dpo_data](../../resources/dpo_data.png)
+而KTO算法的数据比较特殊，只需要 $(x,y,\text{label})$ 格式的数据，其中 $x$ 表示模型输入，$y$ 表示模型输出，label表示回答是否符合人类偏好
+比如![kto_data](../../resources/kto_data.png)
+使用自定义数据集对文本模型或者多模态大模型进行RLHF训练可以参考[自定义数据集文档](../Customization/自定义数据集.md#rlhf)。
+## GRPO
+[论文arvix](https://arxiv.org/abs/2402.03300)
+训练脚本参考[这里](./GRPO.md).
+## DPO
+[论文arvix](https://arxiv.org/abs/2305.18290)
+超参
+- beta：KL正则系数，值越大表示对偏离参考模型的惩罚越大。默认为0.1
+建议在开始DPO训练之前，使用偏好数据集中的偏好回答部分进行SFT训练，以确保数据符合DPO算法的分布要求。
+我们也在DPO loss中混合了sft loss来稳定训练，你可以通过设置超参`rpo_alpha`来调整sft loss的系数，默认为`1.`.
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/dpo.sh).
+## RM
+[论文arvix](https://arxiv.org/abs/2203.02155)
+RLHF中的Reward Modeling阶段
+使用sft训练后的base model或者instruct model作为基底模型, 增加value head, 使用偏好数据集训练为reward model
+增加的value head权重会保存在`value_head.safetensors` 或 `value_head.bin`文件中
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/rm.sh).
+## PPO
+[论文arvix](https://arxiv.org/abs/2203.02155)
+RLHF中的PPO(proximal policy optimization)阶段, 涉及到四个模型
+- model: 训练模型, sft训练后的base model或者instruct model
+- ref_model: 参考模型, 默认为 model
+- reward_model: 奖励模型, 由RM阶段训练得到
+- value_model: 价值模型, 由reward_model初始化, 在训练中同步更新
+超参
+- local_rollout_forward_batch_size: 每次数据采样的批量大小, 默认为64
+- whiten_rewards: 对奖励进行归一化处理, 默认为False
+- kl_coef: KL散度项的系数, 默认为0.05
+- cliprange: PPO策略损失函数中的clip范围, 默认为0.2
+- vf_coef: 价值损失函数系数, 默认为0.1
+- cliprange_value: PPO价值损失函数中的clip范围, 默认为0.2
+- gamma: 累计奖励的折扣因子, 默认为1.0
+- lam: [GAE](https://arxiv.org/abs/1506.02438)中的lambda系数, 默认为0.95
+- num_sample_generations: 训练过程中生成的调试样本数量, 默认为10
+注意: 训练base model时, 需要先sft后再进行rlhf, 指定chat template, sft_type建议使用full
+训练中的指标解释参考[文档](https://huggingface.co/docs/trl/ppov2_trainer#explanation-of-the-logged-metrics)
+## KTO
+[论文arvix](https://arxiv.org/abs/2402.01306)
+超参
+- beta： KL正则系数，值越大表示对偏离参考模型的惩罚越大。默认为0.1
+- desirable_weight ：损失函数中的$\lambda_D$项，偏好回答样本的损失权重, 默认为1.0
+- undesirable_weight ：损失函数中的$\lambda_U$项，拒绝回答样本的损失权重，默认为1.0
+用 $n_D$ 和 $n_U$ 分别表示数据集中偏好回答和拒绝回答的样本数量，对于超参 $\lambda_D$ 和 $\lambda_U$ ，作者推荐设置 $\frac{\lambda_Dn_D}{\lambda_Un_U}\in[1,\frac{4}{3}]$
+训练脚本
+使用 $(x,y,\text{label})$ 格式数据训练
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/kto.sh).
+## CPO
+[论文arvix](https://arxiv.org/abs/2401.08417)
+超参
+- beta：隐含奖励前的系数，默认为0.1
+- cpo_alpha: nll loss系数, 默认为1.0
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/cpo.sh).
+## ORPO
+[论文arvix](https://arxiv.org/abs/2403.07691)
+超参
+- lambda: Odds Ratio loss系数
+注意：ORPO使用参数`--beta`传入超参`lambda`
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/orpo.sh).
+## SimPO
+[论文arvix](https://arxiv.org/abs/2405.14734)
+超参
+- beta：隐含奖励前的系数，默认为2.0
+- simpo_gamma：reward margin项，默认为1.0
+- cpo_alpha: 混合CPO nll loss提高训练稳定性, 默认为1.0, 设置0.0使用原始SimPO算法
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/simpo.sh).

ms-swift/docs/source/Instruction/使用tuners.md ADDED Viewed

	@@ -0,0 +1,133 @@

+# 使用Tuners
+tuner是指附加在模型上的额外结构部分，用于减少训练参数量或者提高训练精度。目前SWIFT支持的tuners有：
+- LoRA: [LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS](https://arxiv.org/abs/2106.09685)
+- LoRA+: [LoRA+: Efficient Low Rank Adaptation of Large Models](https://arxiv.org/pdf/2402.12354.pdf)
+- LLaMA PRO: [LLAMA PRO: Progressive LLaMA with Block Expansion](https://arxiv.org/pdf/2401.02415.pdf)
+- GaLore/Q-GaLore: [GaLore: Memory-Efficient LLM Training by Gradient Low-Rank Projection](https://arxiv.org/abs/2403.03507)
+- Liger Kernel: [Liger Kernel: Efficient Triton Kernels for LLM Training](https://arxiv.org/abs/2410.10989)
+- LISA: [LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning](https://arxiv.org/abs/2403.17919)
+- UnSloth: https://github.com/unslothai/unsloth
+- SCEdit: [SCEdit: Efficient and Controllable Image Diffusion Generation via Skip Connection Editing](https://arxiv.org/abs/2312.11392)  < [arXiv](https://arxiv.org/abs/2312.11392)  |  [Project Page](https://scedit.github.io/) >
+- NEFTune: [Noisy Embeddings Improve Instruction Finetuning](https://arxiv.org/abs/2310.05914)
+- LongLoRA: [Efficient Fine-tuning of Long-Context Large Language Models](https://arxiv.org/abs/2309.12307)
+- Adapter: [Parameter-Efficient Transfer Learning for NLP](http://arxiv.org/abs/1902.00751)
+- Vision Prompt Tuning: [Visual Prompt Tuning](https://arxiv.org/abs/2203.12119)
+- Side: [Side-Tuning: A Baseline for Network Adaptation via Additive Side Networks](https://arxiv.org/abs/1912.13503)
+- Res-Tuning: [Res-Tuning: A Flexible and Efficient Tuning Paradigm via Unbinding Tuner from Backbone](https://arxiv.org/abs/2310.19859)  < [arXiv](https://arxiv.org/abs/2310.19859)  |  [Project Page](https://res-tuning.github.io/)  |  [Usage](ResTuning.md) >
+- [PEFT](https://github.com/huggingface/peft)提供的tuners, 如AdaLoRA、DoRA、Fourierft等
+## 接口列表
+### Swift类静态接口
+- `Swift.prepare_model(model, config, **kwargs)`
+  - 接口作用：加载某个tuner到模型上，如果是PeftConfig的子类，则使用Peft库的对应接口加载tuner。在使用SwiftConfig的情况下，本接口可以传入SwiftModel实例并重复调用，此时和config传入字典的效果相同。
+    - 本接口支持并行加载不同类型的多个tuners共同使用
+  - 参数：
+    - `model`: `torch.nn.Module`或`SwiftModel`的实例，被加载的模型
+    - `config`: `SwiftConfig`、`PeftConfig`的实例，或者一个自定义tuner名称对config的字典
+  - 返回值：`SwiftModel`或`PeftModel`的实例
+- `Swift.merge_and_unload(model)`
+  - 接口作用：将LoRA weights合并回原模型，并将LoRA部分完全卸载
+  - 参数：
+    - model: `SwiftModel`或`PeftModel`的实例，已加载LoRA的模型实例
+  - 返回值：None
+- `Swift.merge(model)`
+  - 接口作用：将LoRA weights合并回原模型，不卸载LoRA部分
+  - 参数：
+    - model: `SwiftModel`或`PeftModel`的实例，已加载LoRA的模型实例
+  - 返回值：None
+- `Swift.unmerge(model)`
+  - 接口作用：将LoRA weights从原模型weights中拆分回LoRA结构
+  - 参数：
+    - model: `SwiftModel`或`PeftModel`的实例，已加载LoRA的模型实例
+  - 返回值：None
+- `Swift.save_to_peft_format(ckpt_dir, output_dir)`
+  - 接口作用：将存储的LoRA checkpoint转换为Peft兼容的格式。主要改变有：
+    - `default`会从对应的`default`文件夹中拆分到output_dir根目录中
+    - weights中的`{tuner_name}.`字段会被移除，如`model.layer.0.self.in_proj.lora_A.default.weight`会变为`model.layer.0.self.in_proj.lora_A.weight`
+    - weights中的key会增加`basemodel.model`前缀
+    - 注意：只有LoRA可以被转换，其他类型tuner由于Peft本身不支持，因此会报转换错误。此外，由于LoRAConfig中存在额外参数，如`dtype`，因此在这些参数有设定的情况下，不支持转换为Peft格式，此时可以手动删除adapter_config.json中的对应字段
+  - 参数：
+    - ckpt_dir：原weights目录
+    - output_dir：目标weights目录
+  - 返回值：None
+- `Swift.from_pretrained(model, model_id, adapter_name, revision, **kwargs)`
+  - 接口作用：从存储的weights目录中加载起tuner到模型上，如果adapter_name不传，则会将model_id目录下所有的tuners都加载起来。同`prepare_model`相同，本接口可以重复调用
+  - 参数：
+    - model：`torch.nn.Module`或`SwiftModel`的实例，被加载的模型
+    - model_id：`str`类型，待加载的tuner checkpoint， 可以是魔搭hub的id，或者训练产出的本地目录
+    - adapter_name：`str`或`List[str]`或`Dict[str, str]`类型或`None`，待加载tuner目录中的tuner名称，如果为`None`则加载所有名称的tuners，如果是`str`或`List[str]`则只加载某些具体的tuner，如果是`Dict`，则将`key`指代的tuner加载起来后换成`value`的名字
+    - revision: 如果model_id是魔搭的id，则revision可以指定对应版本号
+### SwiftModel接口
+下面列出用户可能调用的接口列表，其他内部接口或不推荐使用的接口可以通过`make docs`命令查看API Doc文档。
+- `SwiftModel.create_optimizer_param_groups(self, **defaults)`
+  - 接口作用：根据加载的tuners创建parameter groups，目前仅对`LoRA+`算法有作用
+  - 参数：
+    - defaults：`optimizer_groups`的默认参数，如`lr`和`weight_decay`
+  - 返回值：
+    - 创建的`optimizer_groups`
+- `SwiftModel.add_weighted_adapter(self, ...)`
+  - 接口作用：将已有的LoRA tuners合并为一个
+  - 参数：
+    - 本接口是PeftModel.add_weighted_adapter的透传，参数可以参考：[add_weighted_adapter文档](https://huggingface.co/docs/peft/main/en/package_reference/lora#peft.LoraModel.add_weighted_adapter)
+- `SwiftModel.save_pretrained(self, save_directory, safe_serialization, adapter_name)`
+  - 接口作用：存储tuner weights
+  - 参数：
+    - save_directory：存储目录
+    - safe_serialization： 是否使用safe_tensors，默认为False
+    - adapter_name：存储的adapter tuner，如果不传则默认存储所有的tuners
+- `SwiftModel.set_active_adapters(self, adapter_names, offload=None)`
+  - 接口作用：设置当前激活的adapters，不在列表中的adapters会被失活
+    - 在`推理`时支持环境变量`USE_UNIQUE_THREAD=0/1`，默认值`1`，如果为`0`则set_active_adapters只对当前线程生效，此时默认使用本线程激活的tuners，不同线程tuners互不干扰
+  - 参数：
+    - adapter_names：激活的tuners
+    - offload：失活的adapters如何处理，默认为`None`代表留在显存中，同时支持`cpu`和`meta`，代表offload到cpu和meta设备中以减轻显存消耗，在`USE_UNIQUE_THREAD=0`时offload不要传值以免影响其他线程
+  - 返回值：None
+- `SwiftModel.activate_adapter(self, adapter_name)`
+  - 接口作用：激活一个tuner
+    - 在`推理`时支持环境变量`USE_UNIQUE_THREAD=0/1`，默认值`1`，如果为`0`则activate_adapter只对当前线程生效，此时默认使用本线程激活的tuners，不同线程tuners互不干扰
+  - 参数：
+    - adapter_name：待激活的tuner名字
+  - 返回值：None
+- `SwiftModel.deactivate_adapter(self, adapter_name, offload)`
+  - 接口作用：失活一个tuner
+    - 在`推理`时环境变量`USE_UNIQUE_THREAD=0`时不要调用本接口
+  - 参数：
+    - adapter_name：待失活的tuner名字
+    - offload：失活的adapters如何处理，默认为`None`代表留在显存中，同时支持`cpu`和`meta`，代表offload到cpu和meta设备中以减轻显存消耗
+  - 返回值：None
+- `SwiftModel.get_trainable_parameters(self)`
+  - 接口作用：返回训练参数信息
+  - 参数：无
+  - 返回值：训练参数信息，格式如下：
+    ```text
+    trainable params: 100M || all params: 1000M || trainable%: 10.00% || cuda memory: 10GiB.
+    ```

ms-swift/docs/source/Instruction/常见问题整理.md ADDED Viewed

	@@ -0,0 +1,713 @@

+# 常见问题整理
+下面是swift使用过程中遇到的一些常见问题。
+## 训练
+### Q1: Swift微调支持的模型和数据集有哪些？
+详见文档[支持的模型和数据集](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.html)。
+### Q2: 使用自定义数据集训练时支持的数据格式有哪些？
+自定义数据集格式见文档[自定义数据集](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)。
+### Q3: 自定义数据集dataset_info.json格式，如何通过这种方式使用自定义数据集？
+dataset_info.json格式见文档[自定义数据集](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)。命令行，`--custom_dataset_info xxx.json`，`--dataset <dataset_id_or_path>`。
+### Q4: 如何在界面训练使用自定义数据集？
+界面训练使用自定义数据集与命令行一致，参考文档[自定义数据集](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)。
+### Q5: 数据集jsonl文件里的一行能不能写成这样？{"index": "00000", "query": "11111", "response": "22222", 'source':'qqq'}
+可以有额外字段的，这些字段不会被使用。
+### Q6: 命令行参数在哪个文档中查看？
+详见文档[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q7: 离线环境训练需要配置的参数有哪些？
+`--model 本地路径`，`--check_model false`，详见[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q8: model_type在哪儿查看？
+查看文档[支持的模型和数据集](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.html)。
+### Q9: 模型训练完能直接转gguf格式吗？
+目前只支持导出ModelFile，详见文档[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q10: swift支持预训练吗，我看只有sft？
+支持，命令行`swift pt`，[预训练例子](https://github.com/modelscope/ms-swift/tree/main/examples/train/pretrain)，数据集格式见[自定义数据集](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)。
+### Q11: 想问一下用lora微调的模型，如果想断点续训的话，是应该先把它合成一整个模型吗，还是可以不合起来，直接通过路径来指定原模型和lora块
+不合并，`--resume_from_checkpoint output/xxx/vx-xxx/checkpoint-xxx`，详见[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q12: 我想控制一下从网上下载下来的原始模型权重的位置，怎么才能做到把原始的模型放在指定的文件夹里呢？
+可以配置环境变量`MODELSCOPE_CACHE=your_path`将原始的模型存到指定路径；如果用sdk下载，通过`cache_dir="本地地址"`；也可以使用`modelscope download`命令行工具或`git`下载，详见modelscope文档[模型下载](https://modelscope.cn/docs/models/download)。训练时`--model`配置本地路径即可。如果需要在离线环境训练，配置`--check_model false`，详见[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q13: 有人在用ms-swift遇到过这个问题？
+```text
+[rank6]: pydantic_core._pydantic_core.ValidationError: 1 validation error for DeepSpeedZeroConfig
+[rank6]: stage3_prefetch_bucket_size
+[rank6]: Input should be a valid integer, got a number with a fractional part [type=int_from_float,input_value=11560550.4，in put_type=float]
+[rank6]: For further information visit https://errors.pydantic.dev/2.8/v/int_fro_float
+```
+`deepspeed`版本降到`0.14.*`。
+### Q14: 有微调qwen-2-vl的完整的教程和命令行吗？
+参考多模态模型训练的[例子](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal)。
+### Q15: 多模态大模型微调有什么支持的trick吗，类似llm的neftune?
+`piassa/olora/dora`这些`lora`的变种或者`fourierft`都可以尝试。参考`sft`参数里面的各种trick，有一些不一定在多模态上适用。
+### Q16: 训练过程中eval得到的acc和对应保存的ckpt去重新推理一遍计算得到的acc不是一致的
+训练时候的eval_acc和推理时候的acc 计算方式不一样的。`acc_strategy`: 默认为`'token'`, 可选择的值包括: `'token'`, `'sentence'`.
+### Q17: 魔搭官方镜像与swift环境
+`docker run`命令启动容器即可，如：`docker run --gpus all -p 8000:8000 -it -d --name ms registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.1.0-py310-torch2.3.0-tf2.16.1-1.16.0 /bin/bash`，启动容器后拉最新代码安装swift。
+### Q18: 多机多卡训练命令行
+详见[多机多卡例子](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node)
+### Q19: 如何选择template?
+见[issue](https://github.com/modelscope/ms-swift/issues/1813)。
+### Q20: 多卡训练torchrun和swift sft如何使用？
+`swift sft`走的就是`torchrun`。
+### Q21: 有个问题，因为我的sft数据集太大了，然后每次tokenize都需要很久，有解决方案吗？
+使用`lazy_tokenize`，详见[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q22: 训练时，如果两个数据集直接追加一起放在训练集中，模型在训练的时候内部会有shuffle的流程吗？还是按顺序取数据去训练？
+trainer中会随机。
+### Q23: 如果模型两张卡，数据不开并行，deepspeed就会出现报错，怎么处理呢？
+`deepspeed` 和 `device_map`是不兼容的，两个只能选1个。
+### Q24: 在线训练时已经下载的数据集，离线重新训练为什么还要下载？
+数据文件中有url，不支持离线训练。
+### Q25: vlm模型训练如何减少显存使用？
+配置`--freeze_vit true`。
+### Q26: 为什么WEB-UI界面上支持的模型比文档中少？
+升级一下ms-swift。
+### Q27: 没有适配model_type的模型，sft时可以自定义special_tokens和chat_template吗？
+可以。参考接入模型的PR以及自定义模型数据集文档。
+### Q28: 可以在python脚本里面用DPO去训练qwen2-vl吗？
+可以。从`swift.llm`中导入`rlhf_main` 和`RLHFArguments`。
+### Q29: 请问训练MLLM时，可否先进行纯文本的预训练，然后接入VQA数据集进行微调呢？
+可以。也可以混着训练。
+### Q30: 基于qwen2的sft模型进行dpo训练，v100的机器，训练时都是Nan呢？
+V100机器要用fp32训练qwen2。
+### Q31: 想问一下，swift，能支持蒸馏吗？
+参考这个[例子](https://github.com/modelscope/ms-swift/blob/main/examples/sampler/distill/distill.sh)
+### Q32: cannot import name 'ftp_head' from 'datasets.utils.file_utils' ，有没有遇到这个问题的?
+`pip install datasets==2.*`。
+### Q33: 当前训练完默认最多保存两个checkpoint，如果想多保存几个应该怎么修改呢？
+`--save_total_limit`，详见[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q34: Grounding任务中通用数据格式支持一个类别有多个实例吗？
+目前均支持了一个物体对应多个bbox，参考文档[自定义数据集](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)。
+### Q35: 这个错误为什么会出现在这，numpy.object找不到在哪？
+`numpy==1.26.3`，尝试一下。
+### Q36: swift框架能支持序列并行了吗？
+支持。现在是引入`xtuner`来实现。
+### Q37: 用v100微调qwen2-1.5B时，loss': 0.0, 'acc': 0.0, 'grad_norm': nan，是什么问题呢?
+尝试用fp32。
+### Q38: gptq量化模型，能全参数微调吗？
+不能。gptq模型的int型参数无法参与求导，只能附着lora等额外结构参与更新。
+### Q39: 请问如果想用qlora的方式微调的话应该如何设置参数呢?glm4-chat
+参考qlora[例子](https://github.com/modelscope/ms-swift/tree/main/examples/train/qlora)。
+### Q40: 用qwen2-vl-7b训练自己的数据集，总是出现"AdamW' object has no attribute 'train这个问题。
+`accelerate 0.34.0`试试。
+### Q41: 请教一个问题，我应该如何在swift框架下扩充我的词表呢？
+swift目前还不支持词表扩充。
+### Q42: 同名的模型是可以直接使用huggingface上的吗？
+设置环境变量`USE_HF=1`。
+### Q43: 请问Qwen2-VL-2B能进行增量预训练吗？有指导文件吗?有图文,也有纯文本的。
+支持了，如果继续预训练将所有的内容放在response中就可以了。
+### Q44: 请问下用视频做训练的时候，如何在参数中控制抽帧率，设了frame_rate设不起, minicpmv
+设置环境变量`MAX_NUM_FRAMES`。
+### Q45: swift在训练的时候，可以把验证集的推理结果保存下来吗？
+训练结束后，运行swift infer，会保存。
+### Q46: 我全量full参数dpo，为何保存的checkpoint 比原本模型文件要大呢?整整大了1倍
+用V100微调，存的是fp32类型。
+### Q47: 多机训练速度缓慢，在使用swift框架进行LLM训练时，发现采用deepspeed zero3训练会出现严重的速度下降问题
+详见[issue](https://github.com/modelscope/ms-swift/issues/1825)。
+### Q48: swift现在是支持qwen2-vl多阶段预训练的吗？我看官方的最佳实践里的sft好像都是vit+llm一起训的，不知道支不支持单独finetune
+详见[issue](https://github.com/modelscope/ms-swift/issues/2222)。
+### Q49: qwen2-vl是不是不支持混合纯文本数据?
+支持图文和纯文本。
+### Q50: 微调的时候可以绘制不同数据集的loss曲线吗？
+不支持的，数据集是混合训练的。
+### Q51: 模型训练后，回复重复了很多内容
+参考[预训练与微调](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E9%A2%84%E8%AE%AD%E7%BB%83%E4%B8%8E%E5%BE%AE%E8%B0%83.html)。如果训练过程中出现重复的情况，请多训练几个epoch, 清洗数据, 全参数训练, 采用RLHF的方式缓解。
+### Q52: 想问一下swift目前支持prompt tuning或者prefix tuning吗？
+不支持，这两个方法知识遗忘比较严重，目前不推荐使用。
+### Q53: 两张A10训练报错如下：
+```text
+[rank0]: torch.distributed.DistBackendError: NCCL error in:../torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:1970， unhandled system error (run with NCCL_DEBUG=INFO for details),NCCL version 2.20.5
+[rank0]:ncclSystemError: System call (e.g. socket,malloc) or external library call failed or device error.
+```
+请检查共享内存是否太小，nccl需要共享内存。
+### Q54: 请问在采用DDP微调训练的过程中，冻结某些层时导致的某些参数未参与梯度回传问题怎么解决？
+配置参数`--ddp_find_unused_parameters true`。
+### Q55: swift有没有数据集质检工具？
+[data-juicer](https://github.com/modelscope/data-juicer)。
+### Q56: web端在哪启动模型并行?只找到了数据并行的勾选项，没找到模型并行在哪。
+指定可见显卡就可以。
+### Q57: 如何不自动shuffle呢?想把它关掉。
+目前只能改transformers[代码](https://github.com/huggingface/transformers/blob/main/src/transformers/trainer.py)。
+### Q58: 'num_items_in_batch'是个什么参数？没找到在哪。
+升级`ms-swift==2.5.2`或者降低`transformers<4.46`。
+### Q59: 设置--dataset的话，怎么让数据集下载到固定位置，我在命令行参数没找到，下次如果再次读取的话如何可以从下载的地方读取
+`dataset_path`支持文件夹，一般是`git clone`下载下来的数据集文件夹。详见[自定义数据集文档](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html#dataset-info-json)。
+### Q60: --streaming true，我设置num_train_epochs会报错让我设置max_steps。不可以只设置num_train_epochs吗？
+流式加载数据集一定需要设置`max_steps`。
+### Q61: 好奇tools为啥是"[]"，不是直接支持[]呢，能否帮忙解答一下，这个tools为啥是"[]"这种格式呢，不是直接使用[]呢，有些不理解
+这是因为datasets的底层pyarrow对于类型管控比较严格。我们官方的grounding数据集的objects部分也是因为这个原因要用str，要不pyarrow就会报错：你每行的类型不一致。
+### Q62: 这个参数不能用吗？check_dataset_strategy==discard
+swift3.0没这个参数了，用`strict`参数。
+### Q63: 运行sft命令出现报错如下：
+```text
+RuntimeError: Expected to mark a variable ready only once.This error is caused by one of the following reasons: 1) Use of a module parameter outsid forward function. Please make sure model parameters are not shared across multiple concurrent forward-backward passes. or try to use _set_static_graph( ) as round if this module graph does not change during training loop.2) Reused parameters in multiple reentrant backward passes. For example, if you use multiple oint` functions to wrap the same part of your model, it would result in the same set of parameters been used by different reentrant backward passes multiple and hence marking a variable ready multiple times. DDP does not support such use cases in default. You can try to use _set_static_graph( ) as a workaround if dule graph does not change over iterations.
+```
+加一下这个参数，`--gradient_checkpointing_kwargs '{"use_reentrant": false}'`。
+### Q64: 有遇到过这个问题嘛？AttributeError:’TrainerState’ object has no attribute ’last_model_checkpoint’
+数据集太少了，增加一些。数据数量不足一个step导致的报错。
+### Q65: 我看到custompreprocessor里面可以定义preprocess，这个是在训练开始前全部会处理好，还是一边训练一边加载的啊
+如果设置了参数`--streaming true`，就是一边训练一边加载。默认是全部处理完然后训练。
+### Q66: 全参数训练internvl2_5，为啥里面的 freeze parameters默认就有vision_model 和 mlp1？我看命令行参数的文档里面freeze parameters默认为[],命令中显示设置 freeze vit， freeze aligner， freeze llm都为False，又会打印出来trainable parameters：[‘mlp1’] 也不知道是指只有mlp1可以train 还是 所有的paras都可以train 只是mlp1打印一下
+先freeze parameters再active parameters。`freeze vit/freeze aligner/freeze llm`这三个参数会对freeze parameters 和trainable parameters进行调整.因为有些模型的`vit`中包含`aligner`，所以会将`aligner`单独加入trainable_parameters。
+### Q67: 请问swift中的llamapro对多模态做适配了吗？
+支持的。
+### Q68: 我发现2.x支持MAX_PIXELS，3.x文档里有个--max_pixel参数是一个意思吗，他的处理逻辑是啥样的？我用12000*9000的图片，2.x设置resacle_image训练internvl还是会崩
+环境变量的参数是对应模型的参数，`MAX_PIXELS`只支持qwen2vl的，internvl有自己的环境变量参数，详见[特定模型参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html#id18)。
+### Q69: 从qwen base模型微调成chat模型有没有实践文档，有什么要特别配置的吗?
+A: `swift sft`，没有其他需要特别配置的，参考[例子](https://github.com/modelscope/ms-swift/tree/main/examples/train/base_to_chat)。
+### Q70: sequence parallel例子在哪呀？
+A: 看这个例子[sequence_parallel](https://github.com/modelscope/ms-swift/tree/main/examples/train/sequence_parallel)。
+### Q71: swift能支持训练自己定义的模型结构吗？
+可以的，只需要自定义`get_model_tokenizer_xxx`函数就好了，返回`model`和`tokenizer`。
+### Q72: 我用"name_or_path": "/mnt/workspace/model/Qwen2.5-14B-Instruct"跑longlora 发现出现了报错，不会是只有个llama系列可以使用longlora吧
+只有llama系列能用`longlora`。
+### Q73: 想问下swift怎么加入自己的special token？
+在`get_model_tokenizer`函数中增加。
+### Q74: --freeze_parameters_ratio这个参数，如果设定为0.7，是不是说明训练的时候只更新llm的30%的参数？是随机更新30%吗，这个参数更新的机制是什么呀？
+从下往上freeze。
+### Q75: map过程为啥这么慢，这是正常的吗？
+```text
+Map: 4%|██ | 9000/203823 [02:18<50:34, 64.19 examples/s]
+```
+设置参数`--dataset_num_proc`可以开多进程。
+### Q76: 请问数据集如何能够删除重新下载，感觉数据集出了点问题
+设置参数`--download_mode`。
+### Q77: 请问这个问题如何解决？safetensors_rust.SafetensorError: Error while deserializing header: HeaderTooLarge
+磁盘空间不足了，模型没有保存完整。
+### Q78: swift3.0不支持get_default_template_type是吗？
+请查看`model.model_meta.template`，信息都存在`model.model_meta和model.model_info`。
+### Q79: 魔搭swift微调支持herms格式的agent微调嘛，我看qwen2.5使用vllm原生支持hermes格式的工具调用，怎么swift上没有看到相关的
+目前没有支持`hermes`格式，主要还是`toolbench`和`react`两个格式,因为`react`应用比较广泛一些，目前swift的deploy也支持解析这两种格式并给出`openai tool calling`的。
+### Q80: 请问默认模型训练都是left padding是吧?
+训练可以选择使用左padding还是右padding。默认是右padding, `batch infer`都是左padding。
+### Q81: 请问下现在支持grounding任务了吗
+examples下有[例子](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/grounding.sh)。
+### Q82: 请问现在ms-swift支持对比学习，从而训练llm_emb吗?
+支持，[例子](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train.sh)。
+### Q83: 话说直接从peft和trl库，手搓微调和grpo代码和swift官方在同参数下进行训练，效果差异大吗？
+区别不大，额外支持了多模态。
+### Q84: swift 目前不支持 minicpmo2_6 使用音频模态输入的训练吗？会报错： assert media_type in {'image', 'video'}
+目前不支持音频。
+### Q85: swift可以微调deepseek R1 671B吗？
+可以，template是接入了的，不过过程会比较麻烦，要先fp8转bf16。
+### Q86: 最新的swift框架不是通过这个命令来指定模型的位置的么？这是我已经下载好的模型位置，不知道为什么还要下载，还下不下来，提示报错git clone
+```shell
+--model /mnt/workspace/.cache/modelscope/hub/deepseek-ai/deepseek-vl2/ \
+```
+有些需要clone repo，然后通过`local_repo_path`指定。
+### Q87: swift现在支持多模态的grpo吗？
+支持。
+### Q88: grpo的reward函数支持自己定义么?
+支持，参考[examples/train/grpo/plugin](https://github.com/modelscope/ms-swift/tree/main/examples/train/grpo/plugin)。
+### Q89: 请问为什么 --torch_dtype float16 （卡不能使用bf16）会出现报错：lib/python3.12/site-packages/torch/amp/grad_scaler.py", line 260, in _unscale_grads_ raise ValueError("Attempting to unscale FP16 gradients.") ValueError: Attempting to unscale FP16 gradients.
+全参数，不能fp16训练的。
+### Q90: 请教一个问题。我用swift训练了一个reward模型（基线是qwen2.5-7b），然后用在ppo或者grpo中加载会报错。reward模型是lora训练的。
+```shell
+--rlhf_type ppo \
+--model Qwen/Qwen2.5-14B-Instruct \
+--reward_model /mnt/workspace/output/rm/model --train_type lora \
+--dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' --torch_dtype float32 --num_train_epochs 1 \
+--per_device_train_batch_size 1 --per_device_eval_batch_size 1 --learning_rate 1e-5 --lora_rank 8 --lora_alpha 32 \
+--target_modules all-linear \
+--gradient_accumulation_steps 16 --eval_steps 100 --save_steps 100 \
+```
+lora训练的reward model需要merge一下。
+### Q91: 各位大佬，请问要微调deepseek_vl2，transformers用什么什么版本？官方文档说<4.42，但是4.42及以下也报错。peft版本也要降低吗？
+`peft==0.11.*`。
+### Q92: 请问generate train split太慢了有没有什么好办法呀（大概有30多个数据集，总数据量百万左右）。之前swift 2.x好像没有这么慢。lazy tokenize 已经开了
+设置`--dataset_num_proc 16`。
+### Q93: 请问下微调qwen2.5vl的时候，我想使用全参数微调visual encoder同时使用LoRA微调LLM，怎么实现呢？
+参考这里[例子](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal/lora_llm_full_vit)。
+### Q94: 问一下，swift怎么使用自定义的损失函数？
+plugin中加就可以了。
+### Q95: 请问下MoE的参数有哪些，参数表里关键字搜索不到？专家数量，专家路由这些参数怎么设置？
+直接用config.json中的参数
+### Q96: grpo训练中使用lmdeploy会报相关函数不存在的问题，想请教下具体问题，在lmdeployengine类里面确实没找到load_weights这个函数
+只在turbomind引擎下支持。
+### Q97: Moonlight-16B-A3B-Instruct, 我在微调这个模型的时候报错怎么办?ms-swift好像不支持这个模型进行微调
+因为是模型文件中禁止了训练, 参考deepseek_vl2的解决方案，你搜搜issue。
+### Q98: 训练时出了这个错应该咋解决？RuntimeError: “triu_tril_cuda_template“ not implemented for ‘BFloat16'
+```shell
+CUDA_VISIBLE_DEVICES=01,2,3,4,5,6,7 \
+swift sft \
+    --model Internlm3-8b \
+    --dataset train.json \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 5 \
+    --per_device_train_batch_size 1 \
+    --deepspeed zero3 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 5 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4
+```
+升级torch。
+### Q99: 支持自定义reward吗？
+支持，看这里的[例子](https://github.com/modelscope/ms-swift/tree/main/examples/train/grpo/plugin)。
+### Q100: grpo训练，loss和grad_norm全是0，正常的吗？
+```text
+{'loss':    0.0.    'grad norm':0.0,    'learning_rate':9e-08,    'memory(GiB)':88.1，    'train_speed(iter/s)':0.009252，    'completion_length':    150.00000763，    'response_clip ratio': 0.0,    'rewards/Format':1.0,    'reward
+: 1.0,    'reward std':0.0，    'kl': 0.0, 'clip_ratio': 0.0,    'epoch': 0.0， 'qlobal step/max steps':'1/1052'，    'percentage':'0.10%    'elapsed time':    '36s    'remaining time': '10h 43m 54s'}
+{'loss': 0.0，'grad_norm':0.0，'learning_rate': 1.8e-07,'memory(GiB)':94.15，'train_speed(iter/s)':0.014782，'completion_length': 133.25000763，'response_clip_ratio': 0.0，'rewards/Format': 1.0, 'rewa rd': 1.0，'reward_std': 0.0, 'kl': 0.0，'clip_ratio': 0.0,'epoch': 0.0, 'global_step/max_steps': '2/1052'，'percentage': '0.19%', 'elapsed_time': '1m 3s'， 'remaining_time': '9h 19m 49s'}
+{'loss': 0.0， 'qrad norm': 0.0, 'learning rate': 2.7e-07,'memory(GiB)': 94.15，'train_speed(iter/s)': 0.018695，'completion_length': 123.08333969，，'response_clip_ratio': 0.0，'rewards/Format': 1.0, 'rewa rd': 1.0， 'reward_ std': 0.0,'kl': 0.0,'clip_ratio': 0.0， 'epoch': 0.0， 'global_step/max_steps': '3/1052'，'percentage': '0.29%，'elapsed_time': '1m 29s'，'remaining_time': '8h 39m 34s'}
+```
+训练过程中loss接近0是正常情况，参考[issue](https://github.com/huggingface/open-r1/issues/239#issuecomment-2646297851)。
+### Q101: 请教一下这个grpo的内置奖励函数，从哪里可以传入accuracy_orm
+目前是直接改代码。
+### Q102: 我看这奖励函数有solution参数，是要从数据集里面传过来吗？就是我数据集必须有solution这项？
+是的，针对math问题，不然不好算accuracy。
+### Q103: 训练为什么没有token_acc？
+有些模型`logits`和`labels`数量对不上，就不算的。
+### Q104: 微调Ovis2 使用lora参数不起作用？加不加--train_type lora \，好像都是全参数微调？显存没变化。
+`--max_length`限制一下，这个模型有点特殊，需要padding到max_length。
+### Q105: 请问下用qwen2.5跑一个分类任务，抱下面的错误，是哪里配置的有问题呢？ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.
+数据集是这样的：{"messages": [{"role": "user", "content": "xxxxx"}, {"label": 1}]}
+`label`写在`message`同级。
+### Q106: 有人知道这是哪错了吗?���练方法是vera
+```text
+KeyError("The 'metric_for_best_model' training argument is set to 'eval_loss', which is not found in the evaluati on metrics. The available evaluation metrics are:['eval_runtime', 'eval_samples_per_second', 'eval_steps_per_sec ond', 'epoch', 'global_step/max_steps', 'percentage', 'elapsed_time', 'remaining_time']. consider changing the 'metric_for_best_model' via the TrainingArguments.")
+Train: 45%|    100/220[09:47<11:44，5.87s/it]
+```
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset '/mnt/workspace/data.json' \
+    --train_type vera \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 4 \
+    --per_device_train_batch_size 1 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 5 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --warmup_ratio 0.05 \
+    --output_dir output/Qwen2.5-7B-vera \
+```
+`--label_names labels`，加这个参数。
+### Q107: 启动了VllmEngine，要如何退出呀？就是调用了engine，模型就被载入显存准备工作。但是我推理完想要engine释放显存。下次调用时，再加载。而不是一直占用
+sleep mode啊，支持的。`engine.sleep(level=1)/engine.wake_up()`，构造的时候加一个`enable_sleep_mode=True`。
+### Q108: 求问，streaming模式下，trainer_sampler_random是不是就没有作用了呢？
+streaming是不随机的。
+### Q109: 请问grpo使用vllm进行推理，vllm可以设置trust_rwmote_code吗？
+默认就是true的。
+### Q110: 请教一下，pretrain阶段数据集比较大，用了streaming流式和packing打包数据，这时候需要设置 max_steps，有没有参数或者命令可以根据epochs、bs等参数计算打包后的总的steps吗？
+会按`epochs`和`max_steps`两者较小的结束训练。
+### Q111: unsloth训练，报错：assert(type(target modules) in (list,tuple,))。配置的参数是--target modules all-linear
+别用`all-linear`，改为具体的模块列表，比如`--target_modules q k v`。
+### Q112: Swift现在支持多标签分类么？
+支持的。自定义数据集文档有格式，然后在命令行参数文档中搜索一下`problem_type`，改一下，其他和回归是一样的。
+### Q113: 请问packing中flash_attn是分开处理的还是合并处理的？
+一定需要flash_attn，不然是有误差，attention_mask会出问题。
+## 推理
+### Q1:swift推理有文档吗？
+swift支持python脚本、命令行、ui界面推理，详见[推理和部署](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%8E%A8%E7%90%86%E5%92%8C%E9%83%A8%E7%BD%B2.html)。
+### Q2: 训练后的模型如何使用数据集推理？
+参数`--load_data_args true`或`--val_dataset <your-val-dataset>`，见文档[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q3: swift推理的时候可以指定下载好的模型吗？
+`--model`配置本地路径即可，详见[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q4: 我想在一个没有label的数据集上推理，怎么做呢？我看文档里面的数据集格式都是训练集
+配置参数`--val_dataset <your-val-dataset>`。
+### Q5: 遇到报错ValueError: Input length of input_ids is 35, but `max_length` is set to 20.如何解决？
+```text
+raise ValueError(
+ValueError: Input length of input_ids is 35, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.
+```
+设置model.generation_config.max_new_tokens。
+### Q6: qwen2-vl推理（训练）爆显存
+设置命令行参数`--max_pixels xxx`、环境变量`MAX_PIXELS=xxx`、或特定模型参数`--model_kwargs '{"max_pixels": xxx}'`，其中环境变量仅对文档中对应的模型生效，详见文档[特定模型参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html#id18)。
+### Q7: v100显卡，在python虚拟环境中，参考https://swift2x.readthedocs.io/zh-cn/latest/Multi-Modal/qwen2-vl%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5.html 完成环境准备，在测试推理命令：CUDA_VISIBLE_DEVICES=0,1,2,3 swift infer --model_type qwen2-vl-7b-instruct 时报错：RuntimeError: probability tensor contains either `inf`, `nan` or element < 0
+尝试用A10或者3090机器推理。
+### Q8: 运行下面命令，预测之后的结果在哪里？CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir output/glm4v-9b-chat/vx-xxx/checkpoint-xxx-merged --load_data_args true
+日志中会打印路径。
+### Q9: 现在最新的swift版本，infer命令能通过logprobs参数输出概率值吗？
+可以输出logprobs，命令行推理设置`--logprobs true`，python脚本推理设置`request_config = RequestConfig(..., logprobs=True, top_logprobs=2)`，参考[test_logprobs.py](https://github.com/modelscope/ms-swift/blob/main/tests/infer/test_logprobs.py)。
+### Q10: 最新版本swift，我在加载qwen2-32b-instruct-awq 量化模型及其lora的时候，使用vllm 提示我加上merge lore true,我加上就报错了，我去掉vllm 加速就能正常推理了，但是速度很慢
+qlora训练的模型不支持merge-lora的, 建议lora微调后 merge-lora再量化。
+### Q11: vllm会报错，assert factor in rope_scaling
+详见qwen2-vl [issue#96](https://github.com/QwenLM/Qwen2-VL/issues/96)。
+### Q12: vllm作为推理后端的话，模型必须合并以后才能调用吗？
+可以不合并，详见文档[命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)。
+### Q13: 请问在使用python脚本推理时，如何使用cpu?
+设置环境变量，`os.environ['CUDA_VISIBLE_DEVICES'] = '-1'`。
+### Q14: 有人遇到过这个问题吗?RuntimeError: "triu_tril_cuda_template" not implemented for'BFloat16'
+升级torch,这个版本的torch没实现这个算子。
+### Q15: qwen2-audio支持流式推理吗？
+支持，详见[issue](https://github.com/modelscope/ms-swift/issues/1653)
+### Q16: inference client推理多模态，do_sample在哪里设置？
+设置temperature=0。
+### Q17: ms-swift支持大模型批处理不？
+支持的。详见[demo](https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo.py)。
+### Q18: ms-swift量化模型的时候，显示内存不足，可以在量化的时候少占用一些资源吗，慢一点没关系。
+尝试设置`--device_map cpu`。
+### Q19: swift支持对多模态模型量化吗？
+支持。
+### Q20: 使用GPTQ报错如下，请问是啥原因？
+```text
+if llm_config['architectures'][0] == 'LlamaForCausalLM':
+KeyError: 'architectures'
+```
+尝试transformers==4.44.*版本。
+### Q21: swift infer如何将评估的结果保存到指定文件呢 每次都不知道保存到哪里了
+设置`--result_path your_path`，详见[InferArguments](https://github.com/modelscope/ms-swift/blob/main/swift/llm/argument/infer_args.py)。
+### Q22: AWQ量化yi-vl-6b出错如下：
+```text
+TypeError: swift.llm.utils.model.get_model_tokenizer_with_flash_attn() got multiple values for keyword argument 'automodel_class'.
+```
+请使用gptq量化。
+### Q23: 想问一下用swift export对qwen2.5 72B模型进行gptq int4量化，max model length=32768用的是默认值，给的校准数据集有128个样本，但是量化的时候报错了，报错日志是：factorization could not be completed because the input is not positive-definite(the leading minor of order 18145 is not pisitive-definite)。是什么原因？
+海森矩阵不正定的问题，试试其他的数据集。
+### Q24: 请问批量推理是只能自己编写代码运行吗？不可以按照 sft 那样填脚本参数码
+可以，`swift infer --val_dataset xxx --max_batch_size 16 ... `
+### Q25: 问一下，swift app推理时，temperature默认是多少的？
+默认从generation_config.json中读取。
+### Q26: 请问，导出和量化的时候可以多卡吗？
+加载模型可以多卡，量化是单卡。
+### Q27: swift export的时候传入自定义的template_type,是不是就可以永久改掉template_type了？如果swift export --template_type 自定义,是不是就可以把模型对应的template改掉
+不会被修改,swift中的template是定义在swift内部的,不是以jinja方式保存的。
+### Q28: awq量化Qwen2VL报错：TypeError: Qwen2VLForConditionalGeneration.__init__() got an unexpected keyword argument 'use_cache'
+用`gptq`量化。
+### Q29: ddp 推理，infer里面的这个max_batch_size，是指每张卡的batch_size还是总的batch_size
+每张卡。
+### Q30: 请问swift.inference现在支持messages格式的输入吗？现在看到好像只能用query格式，得到response。数据answer里面已经包含了部分prompt，希望补全answer，应该怎么修改inference
+```text
+{"messages": [{"role": "system", "content": "<system>"}, {"role": "user", "content": "<query1>"}, {"role": "assistant", "content": "answer1, "}]}
+```
+用swift3是可以的，参考[examples/infer/demo_agent](https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_agent.py)。
+### Q31: 请问swift infer的时候，如何让结果实时写入result_path，而不是最后一次性写入呢？
+```shell
+swift infer \
+--ckpt_dir model_dir \
+--streaming true \
+--val_dataset dataset.jsonl \
+--result_path result.jsonl
+```
+`--stream true`，这样可以一条条写，不过是非batch推理的。
+### Q32: 我在swift训练推理的时候是有效果的，但是用merge_lora后再通过ollama的api开接口的时候效果就没了
+试试transformers加载，swift的template是对齐transformers的。
+### Q33: 模型推理的时候如果需要在特定前缀下继续推理的话是设置哪个参数？
+参数`--response_prefix`。
+### Q34: 一直报这个错怎么改呀？
+```text
+File "/mnt/workspace/swift/swift/1lm/dataset/preprocessor/core. py", line 69, in _check_messages raise
+ValueError(f'assistant_message; {assistant_message}')
+ValueError: assistant_message: {'role' :'assistant', 'content': ''}
+```
+```shell
+CUDA_VISIBLE_DEVICES=0 NPROC_PER_NODE=1 MAX_PIXELS=1003520 swift sft --model Qwen/Qwen2.5-VL-7B-Instruct --train_type lora --dataset /mnt/workspace/data.json --deepspeed zero2 --max_length 16384
+```
+数据集assistant字段为空，如果是推理，把这个空字符串删掉，因为这个会导致训练时nan，会做检查。
+## 部署
+### Q1: 如何部署训练后的模型？
+`swift deploy --adapters xxx`，见文档[推理和部署](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%8E%A8%E7%90%86%E5%92%8C%E9%83%A8%E7%BD%B2.html)。
+### Q2: 如何使用vllm部署进行多卡部署？
+详见[例子](https://github.com/modelscope/ms-swift/tree/main/examples/deploy)。
+### Q3: 请问用vllm部署的时候，客户端怎么传入图片？
+详见[客户端例子](https://github.com/modelscope/ms-swift/tree/main/examples/deploy/client/mllm)。
+### Q4: 有个问题想问一下，qwen2-7b部署后使用客户端时，调用openai的api要使用client.completions.create，不能使用client.chat.completions.create，但是使用qwen2-7b-instruct-q5_k_m.gguf的时候可以使用client.chat.completions.create，这是为什么呀？
+base模型可以用client.chat.completions.create的，不过这个是兼容行为。
+### Q5: 使用两张卡用swift deploy启动服务端后，用Ctrl+C退出后，会一直有一个python进程，一直占用一张卡的显存，这是正常现象吗？
+需要kill 一下, 这是vllm的问题。
+### Q6: 在哪查看模型是否支持lmdeploy或vllm加速？
+vllm和lmdeploy分别有自己的模型支持范围，请查看各自官方文档来确定是否可用。
+### Q7: 通义千问2.5-数学-7B-Instruct，会偶尔这样一直返回乱码，是什么问题呢？用vllm部署，fp16。
+尝试bf16。
+### Q8: swift推理服务启动后，交互进行设置的温度之类的配置，如何设置呢？
+推理只能启动前设置。部署可以在启动时设置默认，之后在客户端继续设置，覆盖默认。
+### Q9: 在本地部署qwen2vl模型，推理后端使用vllm，本地视频怎么传入呢？可以使用 base64 传进去吗？curl调用如何加载视频呢？
+base64，详见[mllm客户端例子](https://github.com/modelscope/ms-swift/tree/main/examples/deploy/client/mllm)
+### Q10: qwen2-vl部署时报错如下，是vllm的版本不对么？
+```text
+Unrecognized keys in `rope_scaling`for 'rope_type'='default': {'mrope_section'} Unrecognized keys in `rope_scaling`for 'rope_type'='default': {'mrope_section'}
+```
+详见[issue](https://github.com/QwenLM/Qwen2-VL/issues/209)。
+### Q11: 我用swift deploy做推理的时候，想让他输出token的概率，我加了logprobs True，但是它输出null，这个是什么原因呢？
+```shell
+RAY_memory_monitor_refresh_ms=0 CUDA_VISIBLE_DEVICES=1 nohup swift deploy --ckpt_dir /mnt/workspace/checkpoint_600 --infer_backend vllm --logprobs True --load_data_args false --host 0.0.0.0 --port 8000 &
+```
+需要客户端传参数，`request_config = RequestConfig(..., logprobs=True, top_logprobs=2)`。
+### Q12: wift3.0 部署推理，可以设置请求的超时时间么？如果图片url非法，会等在那里
+设置环境变量`TIMEOUT`,默认是300秒。或者`InferClient`中可以传参数。
+### Q13: swift部署的模型怎么没法流式生成啊？服务端的stream设为True了，客户端的stream也设为True了，但它就是没法流式生成
+客户端控制的，查看[examples/deploy/client](https://github.com/modelscope/ms-swift/tree/main/examples/deploy/client)。
+### Q14: swift部署好多模态模型之后，客户端传PIL.Image，有示例没?
+看这个[client例子](https://github.com/modelscope/ms-swift/blob/main/examples/deploy/client/mllm/openai_client.py)。
+### Q15: 请问 deploy部署时候，设置什么参数可以实现一次输出，输出多个结果呢？
+`RequestConfig`参数`n`。
+## 评测
+### Q1: swift支持的评测集有哪些？
+纯文本评测：
+```text
+'obqa', 'cmb', 'AX_b', 'siqa', 'nq', 'mbpp', 'winogrande', 'mmlu', 'BoolQ', 'cluewsc', 'ocnli', 'lambada',
+'CMRC', 'ceval', 'csl', 'cmnli', 'bbh', 'ReCoRD', 'math', 'humaneval', 'eprstmt', 'WSC', 'storycloze',
+'MultiRC', 'RTE', 'chid', 'gsm8k', 'AX_g', 'bustm', 'afqmc', 'piqa', 'lcsts', 'strategyqa', 'Xsum', 'agieval',
+'ocnli_fc', 'C3', 'tnews', 'race', 'triviaqa', 'CB', 'WiC', 'hellaswag', 'summedits', 'GaokaoBench',
+'ARC_e', 'COPA', 'ARC_c', 'DRCD'
+```
+多模态评测：
+```text
+'COCO_VAL', 'MME', 'HallusionBench', 'POPE', 'MMBench_DEV_EN', 'MMBench_TEST_EN', 'MMBench_DEV_CN', 'MMBench_TEST_CN',
+'MMBench', 'MMBench_CN', 'MMBench_DEV_EN_V11', 'MMBench_TEST_EN_V11', 'MMBench_DEV_CN_V11',
+'MMBench_TEST_CN_V11', 'MMBench_V11', 'MMBench_CN_V11', 'SEEDBench_IMG', 'SEEDBench2',
+'SEEDBench2_Plus', 'ScienceQA_VAL', 'ScienceQA_TEST', 'MMT-Bench_ALL_MI', 'MMT-Bench_ALL',
+'MMT-Bench_VAL_MI', 'MMT-Bench_VAL', 'AesBench_VAL', 'AesBench_TEST', 'CCBench', 'AI2D_TEST', 'MMStar',
+'RealWorldQA', 'MLLMGuard_DS', 'BLINK', 'OCRVQA_TEST', 'OCRVQA_TESTCORE', 'TextVQA_VAL', 'DocVQA_VAL',
+'DocVQA_TEST', 'InfoVQA_VAL', 'InfoVQA_TEST', 'ChartQA_TEST', 'MathVision', 'MathVision_MINI',
+'MMMU_DEV_VAL', 'MMMU_TEST', 'OCRBench', 'MathVista_MINI', 'LLaVABench', 'MMVet', 'MTVQA_TEST',
+'MMLongBench_DOC', 'VCR_EN_EASY_500', 'VCR_EN_EASY_100', 'VCR_EN_EASY_ALL', 'VCR_EN_HARD_500',
+'VCR_EN_HARD_100', 'VCR_EN_HARD_ALL', 'VCR_ZH_EASY_500', 'VCR_ZH_EASY_100', 'VCR_ZH_EASY_ALL',
+'VCR_ZH_HARD_500', 'VCR_ZH_HARD_100', 'VCR_ZH_HARD_ALL', 'MMDU', 'MMBench-Video', 'Video-MME'
+```
+详见文档[评测](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E8%AF%84%E6%B5%8B.html)。
+### Q2: 如何使用自定义评测集？
+纯文本、多模态自定义评测集必须和某个官方评测集数据格式（pattern）保持一致，见文档[评测](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E8%AF%84%E6%B5%8B.html)。
+### Q3: python3.11环境，评测时mmengine报错
+尝试python3.10环境。或先安装全量依赖： `pip3 install evalscope[all]`，再打patch： `pip3 install https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/package/evalscope-0.5.3.post1-py3-none-any.whl`。
+### Q4: 官方支持的评测数据集手动下载后，swift eval能配置本地路径评测吗？
+先下载评测数据集[eval.zip](https://modelscope.cn/datasets/swift/evalscope_resource/files)，解压后将里面的内容放到 `~/.cache/modelscope/media_resources/evalscope/data`文件夹下；再执行swift eval命令就可以使用本地数据。
+### Q5: 自定义评测是不是有bug，把标准例子改成英文，一直都跑不通？
+```shell
+swift eval --model_type 'qwen2_5-1_5b-instruct' --eval_dataset no --custom_eval_config '/mnt/workspace/test_data/config_eval.json'
+```
+这是依赖了nltk的包，然后nltk的tokenizer需要下载一个punkt_tab的zip文件，国内有些环境下载不太稳定或者直接失败。已尝试改了代码做兜底，规避这个问题；参考[issue](https://github.com/nltk/nltk/issues/3293)。
+### Q6: eval微调后的模型，总是会在固定的百分比停掉，但是vllm服务看着一直是有在正常运行的。模型越大，断开的越早。
+`TIMEOUT`环境变量设置为-1。
+### Q7: evalscope 支持多模型对比吗？
+详见[文档](https://evalscope.readthedocs.io/zh-cn/latest/user_guides/arena.html)。
+### Q8: 多模态数据集有没有自定义评估？
+多模态自定义评估可以参考[文档](https://evalscope.readthedocs.io/zh-cn/latest/advanced_guides/custom_dataset/index.html)。
+### Q9: ms-swift有方法测试qps，延迟，tokens/s吗？
+可以尝试使用evalscope的[模型推理性能压测](https://evalscope.readthedocs.io/zh-cn/latest/user_guides/stress_test/index.html)。
+### Q10: 评估的时候可不可以控制数据集条数？评估一个mmlu需要一个多小时，也太慢了。
+配置参数`--eval_limit`，这里的`--eval_limit`是控制了每个subset的条数，比如mmlu有50多个subset，每个limit10条，那就是500多条。
+### Q11: 想请问一下，评测时不是相当于让模型输出一次回答然后检查答案对不对吗，有没有办法可以记录或看到每次完整的回答呢？
+ceval这种多选题的评测是通过计算每个选项的logits来得到的，没有输出回答内容；想得到回答内容的话，可以部署模型服务指定api url来评测，这样是通过解析模型输出来评测的，详见[文档](https://evalscope.readthedocs.io/zh-cn/latest/get_started/basic_usage.html#api)，后面这两种可以做成可选项。
+### Q12: 我想用evalscope压测一下我的模型，想采用prompt.txt文件的形式，这个文件内容的格式应该是什么样子的呀？
+配置line_by_line，详见[文档](https://evalscope.readthedocs.io/zh-cn/latest/user_guides/stress_test/parameters.html#id5)。
+### Q13: 使用evalscope perf进行模型推理性能压测，parallel和number这两个参数怎样使用呢？
+number是请求的总数量，parallel是并发数量。
+### Q14: 问一下评估swift eval里，模型最多生成1024token就结束了，这个如何修改？设置--max_new_tokens 5000，看起来没起作用
+swift里面这个参数还没透出，可以使用evalscope来运行，model里面配置max_tokens参考[文档](https://evalscope.readthedocs.io/zh-cn/latest/user_guides/backend/vlmevalkit_backend.html#id6)。
+### Q15: 请问evalscope现在支持deepseek-r1 的相关benchmark吗？AIME、MATH-500这样
+支持的，这里有[最佳实践](https://evalscope.readthedocs.io/zh-cn/latest/best_practice/deepseek_r1_distill.html)。
+### Q16: 想问一下evalscope测评gpqa使用本地路径报错： ValueError: BuildingConfig 'gpqa_extended' not found. Available: ['default']
+参数配置如下：
+```shell
+ --datasets gpqa --dataset-args '{"gpqa": {"local_path": "/mnt/workspace/gpqa"} }'
+ ```
+数据集如果要下载到本地使用，建议从modelscope上克隆仓库再指定路径。
+### Q17: 用evalscope评测arc数据集的时候，报这个错误，这是什么原因呢，用的是加载本地数据路径方式
+```text
+KeyError: 'RequestId'
+```
+```shell
+--datasets arc --dataset-args '{"arc": {"local_path": "/mnt/workspace/arc"}}'
+```
+参考[文档](https://evalscope.readthedocs.io/zh-cn/latest/get_started/basic_usage.html#id10)，arc数据集本身需要通过py脚本来下载数据，直接clone仓库不行。
+### Q18: 请教一下，想使用opencompass的后端评测，如何从本地加载下载好的数据集？
+opencompass后端不支持设置`data_args`。
+### Q19: swift eval 来评估模型，--eval_backend OpenCompass不支持自定义数据集吗？
+```text
+ValueError: eval_dataset: /mnt/workspace/data.jsonl is not supported.
+eval_backend: OpenCompass supported datasets: ['C3', 'summedits', 'WiC', 'csl', 'lambada', 'mbpp', 'hellaswag', 'ARC_e', 'math', 'nq', 'race', 'MultiRC', 'cmb', 'ceval', 'GaokaoBench', 'mmlu', 'winogrande', 'tnews', 'triviaqa', 'CB', 'cluewsc', 'humaneval', 'AX_g', 'DRCD', 'RTE', 'ocnli_fc', 'gsm8k', 'obqa', 'ReCoRD', 'Xsum', 'ocnli', 'WSC', 'siqa', 'agieval', 'piqa', 'cmnli', 'cmmlu', 'eprstmt', 'storycloze', 'AX_b', 'afqmc', 'strategyqa', 'bustm', 'BoolQ', 'COPA', 'ARC_c', 'PMMEval', 'chid', 'CMRC', 'lcsts']
+```
+opencompass不支持自定义数据集，用native可以自定义模式。
+### Q20: 我在本地用单张A100运行模型来做evalscope官方文档里的RAGAS评测任务（https://evalscope.readthedocs.io/zh-cn/latest/user_guides/backend/rageval_backend/ragas.html）时，跑文档中的两个样例花费了10分钟的时间，请问这是正常的么？有没有什么办法可以优化运行速度。
+rag评测本身确实比较耗资源，使用本地critic llm确实会慢一些，处理不了batch请求，建议用vllm这样的框架来拉起任务。