diff --git a/.gitattributes b/.gitattributes index b43131c82cd21f9ed4be4cef3bbc6c7583623af6..f8d2f31cf3853000a83449a2a5c3ac5bdc2b219f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -51,3 +51,5 @@ seamless_interaction/assets/banner.gif filter=lfs diff=lfs merge=lfs -text docs/resources/grpo_countdown.png filter=lfs diff=lfs merge=lfs -text docs/resources/grpo_geoqa.png filter=lfs diff=lfs merge=lfs -text docs/resources/grpo_openr1_multimodal.png filter=lfs diff=lfs merge=lfs -text +docs/resources/web-ui-en.jpg filter=lfs diff=lfs merge=lfs -text +docs/resources/kto_data.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..3c7cc70072bfe0eb80858616b22eee880988cd48 --- /dev/null +++ b/.gitignore @@ -0,0 +1,151 @@ +# Byte-compiled / optimized / DLL files +tmp +*.ttf +__pycache__/ +*.py[cod] +*$py.class +test.py +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +/package +/temp +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +.vscode +.idea +.run + +# custom +*.pkl +*.pkl.json +*.log.json +*.whl +*.tar.gz +*.swp +*.log +*.tar.gz +source.sh +tensorboard.sh +.DS_Store +replace.sh +result.png +result.jpg +result.mp4 +output/ +outputs/ +wandb/ +*.out +benchmarks/ +eval_output/ +eval_outputs/ +transformers/ +vlmeval/ +my_model/ +/data +result/ +images +/custom/ +megatron_output/ + +# Pytorch +*.pth +*.pt + +# ast template +ast_index_file.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..558ddc5a8bafc5e7bdb5e64277315ea0bf87705f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,52 @@ +repos: + - repo: https://github.com/pycqa/flake8.git + rev: 4.0.0 + hooks: + - id: flake8 + exclude: | + (?x)^( + thirdparty/| + examples/| + tests/run.py + )$ + - repo: https://github.com/PyCQA/isort.git + rev: 4.3.21 + hooks: + - id: isort + exclude: | + (?x)^( + examples/| + tests/run.py| + swift/cli/sft.py + )$ + - repo: https://github.com/pre-commit/mirrors-yapf.git + rev: v0.30.0 + hooks: + - id: yapf + exclude: | + (?x)^( + thirdparty/| + examples/| + tests/run.py + )$ + - repo: https://github.com/pre-commit/pre-commit-hooks.git + rev: v3.1.0 + hooks: + - id: trailing-whitespace + exclude: thirdparty/|tests/run.py + - id: check-yaml + exclude: thirdparty/|tests/run.py + - id: end-of-file-fixer + exclude: thirdparty/|tests/run.py + - id: requirements-txt-fixer + exclude: thirdparty/|tests/run.py + - id: double-quote-string-fixer + exclude: thirdparty/|tests/run.py + - id: check-merge-conflict + exclude: thirdparty/|tests/run.py + - id: fix-encoding-pragma + exclude: thirdparty/|tests/run.py + args: ["--remove"] + - id: mixed-line-ending + exclude: thirdparty/|tests/run.py + args: ["--fix=lf"] diff --git a/.pre-commit-config_local.yaml b/.pre-commit-config_local.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6ef27d9a75117fccea0062835a4679e7f23dc77 --- /dev/null +++ b/.pre-commit-config_local.yaml @@ -0,0 +1,52 @@ +repos: + - repo: /home/admin/pre-commit/flake8 + rev: 4.0.0 + hooks: + - id: flake8 + exclude: | + (?x)^( + thirdparty/| + examples/| + tests/run.py + )$ + - repo: /home/admin/pre-commit/isort + rev: 4.3.21 + hooks: + - id: isort + exclude: | + (?x)^( + examples/| + tests/run.py| + swift/cli/sft.py + )$ + - repo: /home/admin/pre-commit/mirrors-yapf + rev: v0.30.0 + hooks: + - id: yapf + exclude: | + (?x)^( + thirdparty/| + examples/| + tests/run.py + )$ + - repo: /home/admin/pre-commit/pre-commit-hooks + rev: v3.1.0 + hooks: + - id: trailing-whitespace + exclude: thirdparty/|tests/run.py + - id: check-yaml + exclude: thirdparty/|tests/run.py + - id: end-of-file-fixer + exclude: thirdparty/ + - id: requirements-txt-fixer + exclude: thirdparty/|tests/run.py + - id: double-quote-string-fixer + exclude: thirdparty/|tests/run.py + - id: check-merge-conflict + exclude: thirdparty/|tests/run.py + - id: fix-encoding-pragma + exclude: thirdparty/|tests/run.py + args: ["--remove"] + - id: mixed-line-ending + exclude: thirdparty/|tests/run.py + args: ["--fix=lf"] diff --git a/4JOB_train.tar b/4JOB_train.tar new file mode 100644 index 0000000000000000000000000000000000000000..8026abe5aaec0765989e509c61e2f748463ddb61 --- /dev/null +++ b/4JOB_train.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69f7465b4776100721f926c3f4221d72752dfe6f124d6f45586e2c2eadc55b7e +size 6600263680 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000000000000000000000000000000..804f94936bd28449ec7a2eb93447b3adbc475e71 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +contact@modelscope.cn. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..c6d7f9b884869d9f0d1e63ef3a73a717dd2bcdec --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,63 @@ +# Contributor Guide + +_Welcome to offer PRs, bug reports, documentation supplements or other types of contributions to SWIFT!_ + +## Table of Contents +- [Code of Conduct](#-code-of-conduct) +- [Contribution Process](#-contribution-process) +- [Hardware support](#-Hardware-support) + +## 📖 Code of Conduct +Please refer to our [Code of Conduct documentation](./CODE_OF_CONDUCT.md). + +## 🔁 Contribution Process +### What We Need +- New Technologies and New Models: SWIFT needs to support more open-source models and datasets, or new technologies that we have not paid attention to. If you are interested please submit a PR to us. +- Technical Propagation: If you are interested in technical propagation, you are welcome to help us write tutorials, documents or videos on any website, and send us the link. +- Community Contribution: You can write technical articles related to SWIFT, and submit them to us. After review and approval, we will publish them on the official ModelScope accounts (Zhihu, WeChat, etc.), with your name assigned. + +### Incentives +- we will issue electronic certificates to contributors on behalf of the ModelScope community, to encourage your selfless contributions. +- We will offer small souvenirs related to the ModelScope Community. +- We will provide free A10 computing power during the development period. For more details, please refer to [Hardware-support](#-Hardware-support) section. + +### Submitting PR (Pull Requests) + +Any feature development is carried out in the form of Fork and then PR on GitHub. +1. Fork: Go to the [SWIFT](https://github.com/modelscope/swift) page and click the **Fork button**. After completion, a SWIFT code repository will be cloned under your personal organization. +2. Clone: Clone the code repository generated in the first step to your local machine and **create a new branch** for development. During development, please click the **Sync Fork button** in time to synchronize with the `main` branch to prevent code expiration and conflicts. +3. Submit PR: After development and testing, push the code to the remote branch. On GitHub, go to the **Pull Requests page**, create a new PR, select your code branch as the source branch, and the `modelscope/swift:main` branch as the target branch. + +4. Write Description: It is necessary to provide a good feature description in the PR, so that the reviewers know the content of your modification. +5. Review: We hope that the code to be merged is concise and efficient, so we may raise some questions and discuss them. Please note that any issues raised in the review are aimed at the code itself, not at you personally. Once all issues are discussed and resolved, your code will be approved. + +### Code Standards and Development Approach +SWIFT has conventional variable naming conventions and development approaches. Please follow these approaches as much as possible during development. +1. Variable names are separated by underscores, and class names are named with the first letter of each word capitalized. +2. All Python indentation uses four spaces instead of a tab. +3. Choose well-known open-source libraries, avoid using closed-source libraries or unstable open-source libraries, and avoid repeating the existing code. + +After the PR is submitted, SWIFT will perform two types of tests: +- Code Lint Test: A static code compliance check test. please make sure that you have performed code lint locally in advance. +```shell +pip install pre-commit # In the swift folder +pre-commit run --all-files # Fix the errors reported by pre-commit until all checks are successful +``` +- CI Tests: Smoke tests and unit tests, please refer to the next section. + +### Running CI Tests +Before submitting the PR, please ensure that your development code is protected by test cases, such as smoke tests for new features, or unit tests for various edge cases. Reviewers will also pay attention to this during code review. At the same time, there will be dedicated services running CI Tests, running all test cases, and the code can only be merged after the test cases pass. + +Additionally, since some important tests have been skipped due to long running time, to ensure that your logic is correct, you can run the test locally: +```shell +python tests/llm/test_run.py +``` +Please make sure this test can pass normally. + +## ✅ Hardware support + +SWIFT will provide hardware support for developers, including free GPUs. If needed, please email us ([contact@modelscope.cn](mailto:contact@modelscope.cn)) or join our WeChat group: + +

+ +

diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..9a202b87ef56210b956e21a5e988b79d348296a4 --- /dev/null +++ b/CONTRIBUTING_CN.md @@ -0,0 +1,81 @@ +# 贡献者指引 + +*欢迎帮SWIFT提供Feature PR、Bug反馈、文档补充或其他类型的贡献!* + +## 目录 + +- [代码规约](#-代码规约) +- [贡献流程](#-贡献流程) +- [资源支持](#-资源支持) + +## 📖 代码规约 + +请查看我们的[代码规约文档](./CODE_OF_CONDUCT.md). + +## 🔁 贡献流程 + +### 我们需要什么 +- 新技术和新模型:SWIFT需要支持更多的开源模型和数据集,或我们没有关注到的新技术,如果您对此有兴趣,可以提交PR给我们。 +- 技术布道:如果您对技术布道有兴趣,欢迎在任何网站上帮我们撰写教程文档或视频等,并将链接发给我们。 +- 社区供稿:您可以撰写和SWIFT有关的技术文章,并供稿给我们,我们审核通过后会在魔搭官方账号(知乎、公众号等)上进行发布,并属上您的名字。 + +### 激励 + +- 我们会以魔搭社区的身份给贡献者颁发电子证书,以鼓励您的无私贡献。 +- 我们会赠送相关魔搭社区相关周边小礼品。 +- 我们会赠送开发期间的免费A10算力,具体可以查看[资源支持](#-资源支持)章节。 + +### 提交PR(Pull Requests) + +任何feature开发都在github上以先Fork后PR的形式进行。 + +1. Fork:进入[SWIFT](https://github.com/modelscope/swift)页面后,点击**Fork按钮**执行。完成后会在您的个人组织下克隆出一个SWIFT代码库 + +2. Clone:将第一步产生的代码库clone到本地并**拉新分支**进行开发,开发中请及时点击**Sync Fork按钮**同步`main`分支,防止代码过期并冲突 + +3. 提交PR:开发、测试完成后将代码推送到远程分支。在github上点击**Pull Requests页面**,新建一个PR,源分支选择您提交的代码分支,目标分支选择`modelscope/swift:main`分支 + +4. 撰写描述:在PR中填写良好的feature描述是必要的,让Reviewers知道您的修改内容 + +5. Review:我们希望合入的代码简洁高效,因此可能会提出一些问题并讨论。请注意,任何review中提出的问题是针对代码本身,而非您个人。在所有问题讨论通过后,您的代码会被通过 + +### 代码规范和开发方式 + +SWIFT有约定俗成的变量命名方式和开发方式。在开发中请尽量遵循这些方式。 + +1. 变量命名以下划线分割,类名以所有单词首字母大写方式命名 +2. 所有的python缩进都是四个空格取代一个tab +3. 选用知名的开源库,避免使用闭源库或不稳定的开源库,避免重复造轮子 + +SWIFT在PR提交后会进行两类测试: + +- Code Lint测试 对代码进行静态规范走查的测试,为保证改测试通过,请保证本地预先进行了Code lint。方法是: + + ```shell + pip install pre-commit + # 在swift文件夹内 + pre-commit run --all-files + # 对pre-commit报的错误进行修改,直到所有的检查都是成功状态 + ``` + +- CI Tests 冒烟测试和单元测试,请查看下一章节 + +### Running CI Tests + +在提交PR前,请保证您的开发代码已经受到了测试用例的保护。例如,对新功能的冒烟测试,或者各种边缘case的单元测试等。在代码review时Reviewers也会关注这一点。同时,也会有服务专门运行CI Tests,运行所有的测试用例,测试用例通过后代码才可以合并。 + +另外,由于运行时间过长,我们跳过了部分重要测试,为保证您的逻辑是正确的,可以在本地执行该测试: + +```shell +python tests/llm/test_run.py +``` + +请保证该测试可以正常通过。 + +## ✅ 资源支持 + +SWIFT会为开发者提供资源支持,包括免费的GPU算力。如果需要请邮件联系我们([contact@modelscope.cn](mailto:contact@modelscope.cn))或加入我们的微信群: + +

+ +

diff --git a/GRPO_TEST.jsonl b/GRPO_TEST.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..33ca8728fb9ae6ba9d66c4b1474c3b3d3d652fb5 --- /dev/null +++ b/GRPO_TEST.jsonl @@ -0,0 +1,274 @@ +{"messages": [{"role": "user", "content": "