Improve: Add Tool-Star citation to model card (#2)
Browse files- Improve: Add Tool-Star citation to model card (8a903f22cfe4f77d3b75c3e0faa137728f0e725b)
Co-authored-by: Niels Rogge <nielsr@users.noreply.huggingface.co>
README.md
CHANGED
|
@@ -1,18 +1,18 @@
|
|
| 1 |
---
|
| 2 |
-
license: mit
|
| 3 |
-
pipeline_tag: text-generation
|
| 4 |
-
library_name: transformers
|
| 5 |
-
datasets:
|
| 6 |
-
- dongguanting/ARPO-SFT-54K
|
| 7 |
-
- dongguanting/ARPO-RL-Reasoning-10K
|
| 8 |
-
- dongguanting/ARPO-RL-DeepSearch-1K
|
| 9 |
-
language: en
|
| 10 |
base_model:
|
| 11 |
- Qwen/Qwen2.5-3B-Instruct
|
| 12 |
- Qwen/Qwen2.5-7B-Instruct
|
| 13 |
- meta-llama/Llama-3.1-8B-Instruct
|
| 14 |
- Qwen/Qwen3-8B-Instruct
|
| 15 |
- Qwen/Qwen3-14B-Instruct
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
---
|
| 17 |
|
| 18 |
# Agentic Reinforced Policy Optimization (ARPO)
|
|
@@ -114,6 +114,30 @@ If you find this work helpful, please cite our paper:
|
|
| 114 |
primaryClass={cs.LG},
|
| 115 |
url={https://arxiv.org/abs/2507.19849},
|
| 116 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
```
|
| 118 |
|
| 119 |
## 🤝 Acknowledgements
|
|
|
|
| 1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
base_model:
|
| 3 |
- Qwen/Qwen2.5-3B-Instruct
|
| 4 |
- Qwen/Qwen2.5-7B-Instruct
|
| 5 |
- meta-llama/Llama-3.1-8B-Instruct
|
| 6 |
- Qwen/Qwen3-8B-Instruct
|
| 7 |
- Qwen/Qwen3-14B-Instruct
|
| 8 |
+
datasets:
|
| 9 |
+
- dongguanting/ARPO-SFT-54K
|
| 10 |
+
- dongguanting/ARPO-RL-Reasoning-10K
|
| 11 |
+
- dongguanting/ARPO-RL-DeepSearch-1K
|
| 12 |
+
language: en
|
| 13 |
+
library_name: transformers
|
| 14 |
+
license: mit
|
| 15 |
+
pipeline_tag: text-generation
|
| 16 |
---
|
| 17 |
|
| 18 |
# Agentic Reinforced Policy Optimization (ARPO)
|
|
|
|
| 114 |
primaryClass={cs.LG},
|
| 115 |
url={https://arxiv.org/abs/2507.19849},
|
| 116 |
}
|
| 117 |
+
@article{dong2025toolstar,
|
| 118 |
+
author = {Guanting Dong and
|
| 119 |
+
Yifei Chen and
|
| 120 |
+
Xiaoxi Li and
|
| 121 |
+
Jiajie Jin and
|
| 122 |
+
Hongjin Qian and
|
| 123 |
+
Yutao Zhu and
|
| 124 |
+
Hangyu Mao and
|
| 125 |
+
Guorui Zhou and
|
| 126 |
+
Zhicheng Dou and
|
| 127 |
+
Ji{-}Rong Wen},
|
| 128 |
+
title = {Tool-Star: Empowering LLM-Brained Multi-Tool Reasoner via Reinforcement
|
| 129 |
+
Learning},
|
| 130 |
+
journal = {CoRR},
|
| 131 |
+
volume = {abs/2505.16410},
|
| 132 |
+
year = {2025},
|
| 133 |
+
url = {https://doi.org/10.48550/arXiv.2505.16410},
|
| 134 |
+
doi = {10.48550/ARXIV.2505.16410},
|
| 135 |
+
eprinttype = {arXiv},
|
| 136 |
+
eprint = {2505.16410},
|
| 137 |
+
timestamp = {Thu, 26 Jun 2025 07:49:34 +0200},
|
| 138 |
+
biburl = {https://dblp.org/rec/journals/corr/abs-2505-16410.bib},
|
| 139 |
+
bibsource = {dblp computer science bibliography, https://dblp.org}
|
| 140 |
+
}
|
| 141 |
```
|
| 142 |
|
| 143 |
## 🤝 Acknowledgements
|