munish0838 commited on
Commit
8af7fa8
·
verified ·
1 Parent(s): f17fc23

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -12,6 +12,7 @@ language:
12
  - en
13
  base_model:
14
  - deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
 
15
 
16
  ---
17
 
@@ -32,7 +33,7 @@ This is quantized version of [agentica-org/DeepScaleR-1.5B-Preview](https://hugg
32
  </div>
33
  <br>
34
  <div align="center" style="line-height: 1;">
35
- <a href="https://github.com/agentica-project/deepscaler" style="margin: 2px;">
36
  <img alt="Code" src="https://img.shields.io/badge/DeepScaleR-000000?style=for-the-badge&logo=github&logoColor=000&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
37
  </a>
38
  <a href="https://pretty-radio-b75.notion.site/DeepScaleR-Surpassing-O1-Preview-with-a-1-5B-Model-by-Scaling-RL-19681902c1468005bed8ca303013a4e2" target="_blank" style="margin: 2px;">
@@ -86,7 +87,7 @@ A more detailed description of the training recipe can be found in our [blog pos
86
  We report Pass@1 accuracy averaged over 16 samples for each problem.
87
  | Model | AIME 2024 | MATH 500 | AMC 2023 | Minerva Math | OlympiadBench | Avg. |
88
  |-------|-----------|-----------|-----------|--------------|---------------|------|
89
- | 2.5-7B-Instruct | 13.3 | 79.8 | 50.6 | 34.6 | 40.7 | 43.8 |
90
  | rStar-Math-7B | 26.7 | 78.4 | 47.5 | - | 47.1 | - |
91
  | Eurus-2-7B-PRIME | 26.7 | 79.2 | 57.8 | 38.6 | 42.1 | 48.9 |
92
  | Qwen2.5-7B-SimpleRL | 26.7 | 82.4 | 62.5 | <strong>39.7</strong> | 43.3 | 50.9 |
@@ -118,7 +119,7 @@ This permissive license ensures that researchers, developers, and enthusiasts wo
118
  ```bibtex
119
  @misc{deepscaler2025,
120
  title={DeepScaleR: Surpassing O1-Preview with a 1.5B Model by Scaling RL},
121
- author={Michael Luo and Sijun Tan and Justin Wong and Xiaoxiang Shi and William Tang and Manan Roongta and Colin Cai and Jeffrey Luo and Tianjun Zhang and Erran Li and Raluca Ada Popa and Ion Stoica},
122
  year={2025},
123
  howpublished={\url{https://pretty-radio-b75.notion.site/DeepScaleR-Surpassing-O1-Preview-with-a-1-5B-Model-by-Scaling-RL-19681902c1468005bed8ca303013a4e2}},
124
  note={Notion Blog}
 
12
  - en
13
  base_model:
14
  - deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
15
+ pipeline_tag: text-generation
16
 
17
  ---
18
 
 
33
  </div>
34
  <br>
35
  <div align="center" style="line-height: 1;">
36
+ <a href="https://github.com/agentica-project/rllm" style="margin: 2px;">
37
  <img alt="Code" src="https://img.shields.io/badge/DeepScaleR-000000?style=for-the-badge&logo=github&logoColor=000&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
38
  </a>
39
  <a href="https://pretty-radio-b75.notion.site/DeepScaleR-Surpassing-O1-Preview-with-a-1-5B-Model-by-Scaling-RL-19681902c1468005bed8ca303013a4e2" target="_blank" style="margin: 2px;">
 
87
  We report Pass@1 accuracy averaged over 16 samples for each problem.
88
  | Model | AIME 2024 | MATH 500 | AMC 2023 | Minerva Math | OlympiadBench | Avg. |
89
  |-------|-----------|-----------|-----------|--------------|---------------|------|
90
+ | Qwen-2.5-7B-Instruct | 13.3 | 79.8 | 50.6 | 34.6 | 40.7 | 43.8 |
91
  | rStar-Math-7B | 26.7 | 78.4 | 47.5 | - | 47.1 | - |
92
  | Eurus-2-7B-PRIME | 26.7 | 79.2 | 57.8 | 38.6 | 42.1 | 48.9 |
93
  | Qwen2.5-7B-SimpleRL | 26.7 | 82.4 | 62.5 | <strong>39.7</strong> | 43.3 | 50.9 |
 
119
  ```bibtex
120
  @misc{deepscaler2025,
121
  title={DeepScaleR: Surpassing O1-Preview with a 1.5B Model by Scaling RL},
122
+ author={Michael Luo and Sijun Tan and Justin Wong and Xiaoxiang Shi and William Y. Tang and Manan Roongta and Colin Cai and Jeffrey Luo and Li Erran Li and Raluca Ada Popa and Ion Stoica},
123
  year={2025},
124
  howpublished={\url{https://pretty-radio-b75.notion.site/DeepScaleR-Surpassing-O1-Preview-with-a-1-5B-Model-by-Scaling-RL-19681902c1468005bed8ca303013a4e2}},
125
  note={Notion Blog}