Update README.md
Browse files
README.md
CHANGED
|
@@ -119,13 +119,42 @@ YOUR_TEST_FILE="<test_data_path>"
|
|
| 119 |
## 🤝 Citation
|
| 120 |
If you find this work helpful, please cite our paper:
|
| 121 |
```bibtex
|
| 122 |
-
@misc{
|
| 123 |
-
title={
|
| 124 |
-
author={Zhenpeng Su and Leiyu Pan and
|
| 125 |
year={2025},
|
| 126 |
-
eprint={
|
| 127 |
archivePrefix={arXiv},
|
| 128 |
primaryClass={cs.LG},
|
| 129 |
-
url={https://arxiv.org/abs/
|
| 130 |
}
|
| 131 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
## 🤝 Citation
|
| 120 |
If you find this work helpful, please cite our paper:
|
| 121 |
```bibtex
|
| 122 |
+
@misc{su2025cegppocontrollingentropygradientpreserving,
|
| 123 |
+
title={CE-GPPO: Controlling Entropy via Gradient-Preserving Clipping Policy Optimization in Reinforcement Learning},
|
| 124 |
+
author={Zhenpeng Su and Leiyu Pan and Minxuan Lv and Yuntao Li and Wenping Hu and Fuzheng Zhang and Kun Gai and Guorui Zhou},
|
| 125 |
year={2025},
|
| 126 |
+
eprint={2509.20712},
|
| 127 |
archivePrefix={arXiv},
|
| 128 |
primaryClass={cs.LG},
|
| 129 |
+
url={https://arxiv.org/abs/2509.20712},
|
| 130 |
}
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
```bibtex
|
| 135 |
+
@article{DBLP:journals/corr/abs-2508-07629,
|
| 136 |
+
author = {Zhenpeng Su and
|
| 137 |
+
Leiyu Pan and
|
| 138 |
+
Xue Bai and
|
| 139 |
+
Dening Liu and
|
| 140 |
+
Guanting Dong and
|
| 141 |
+
Jiaming Huang and
|
| 142 |
+
Wenping Hu and
|
| 143 |
+
Fuzheng Zhang and
|
| 144 |
+
Kun Gai and
|
| 145 |
+
Guorui Zhou},
|
| 146 |
+
title = {Klear-Reasoner: Advancing Reasoning Capability via Gradient-Preserving
|
| 147 |
+
Clipping Policy Optimization},
|
| 148 |
+
journal = {CoRR},
|
| 149 |
+
volume = {abs/2508.07629},
|
| 150 |
+
year = {2025},
|
| 151 |
+
url = {https://doi.org/10.48550/arXiv.2508.07629},
|
| 152 |
+
doi = {10.48550/ARXIV.2508.07629},
|
| 153 |
+
eprinttype = {arXiv},
|
| 154 |
+
eprint = {2508.07629},
|
| 155 |
+
timestamp = {Sat, 13 Sep 2025 14:46:27 +0200},
|
| 156 |
+
biburl = {https://dblp.org/rec/journals/corr/abs-2508-07629.bib},
|
| 157 |
+
bibsource = {dblp computer science bibliography, https://dblp.org}
|
| 158 |
+
}
|
| 159 |
+
```
|
| 160 |
+
|