Add files using upload-large-folder tool
Browse files- .gitattributes +1 -0
- Convert.py +69 -0
- README.md +92 -0
- genai_config.json +50 -0
- model.onnx +3 -0
- model.onnx.data +3 -0
- special_tokens_map.json +6 -0
- tokenizer.json +0 -0
- tokenizer_config.json +19 -0
.gitattributes
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 14 |
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx.data filter=lfs diff=lfs merge=lfs -text
|
| 18 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
Convert.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# /// script
|
| 2 |
+
# requires-python = ">=3.12"
|
| 3 |
+
# dependencies = [
|
| 4 |
+
# "flash-linear-attention>=0.4.2",
|
| 5 |
+
# "hf-xet>=1.4.3",
|
| 6 |
+
# "huggingface-hub>=1.8.0",
|
| 7 |
+
# "onnx>=1.21.0",
|
| 8 |
+
# "onnx-ir>=0.2.0",
|
| 9 |
+
# "onnxruntime>=1.24.4",
|
| 10 |
+
# "onnxruntime-genai>=0.13.1",
|
| 11 |
+
# "sentencepiece>=0.2.1",
|
| 12 |
+
# "tiktoken>=0.12.0",
|
| 13 |
+
# "torch>=2.11.0",
|
| 14 |
+
# "transformers>=5.4.0",
|
| 15 |
+
# ]
|
| 16 |
+
# ///
|
| 17 |
+
|
| 18 |
+
import argparse
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
from huggingface_hub import snapshot_download
|
| 21 |
+
#from onnxruntime_genai.python.models.builder import create_model
|
| 22 |
+
from onnxruntime_genai.models.builder import create_model
|
| 23 |
+
def main():
|
| 24 |
+
parser = argparse.ArgumentParser()
|
| 25 |
+
parser.add_argument("--name", required=False,default=None)
|
| 26 |
+
parser.add_argument("--token",required=False)
|
| 27 |
+
args = parser.parse_args()
|
| 28 |
+
|
| 29 |
+
token = args.token if args.token else None
|
| 30 |
+
|
| 31 |
+
pwd = Path.cwd()
|
| 32 |
+
model_dir = pwd / "model"
|
| 33 |
+
onnx_dir = pwd / "onnx"
|
| 34 |
+
cache_dir = pwd / "cache"
|
| 35 |
+
model_dir.mkdir(exist_ok=True)
|
| 36 |
+
onnx_dir.mkdir(exist_ok=True)
|
| 37 |
+
cache_dir.mkdir(exist_ok=True)
|
| 38 |
+
|
| 39 |
+
# ===== STEP 1: DOWNLOAD (HF HUB + XET backend automatically used) =====
|
| 40 |
+
print(">> Downloading model via huggingface_hub (Xet enabled if installed)...")
|
| 41 |
+
|
| 42 |
+
local_path = snapshot_download(
|
| 43 |
+
repo_id=args.name,
|
| 44 |
+
local_dir=str(model_dir),
|
| 45 |
+
token=token
|
| 46 |
+
#local_dir_use_symlinks=False # important for ONNX tools
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
print(f"Model downloaded to: {local_path}")
|
| 50 |
+
|
| 51 |
+
# ===== STEP 2: CONVERT USING ONNX GENAI BUILDER =====
|
| 52 |
+
print(">> Converting to ONNX (GenAI format)...")
|
| 53 |
+
|
| 54 |
+
create_model(
|
| 55 |
+
model_name=args.name,
|
| 56 |
+
input_path=str(model_dir), # HF model directory
|
| 57 |
+
output_dir=str(onnx_dir), # ONNX output
|
| 58 |
+
precision="fp16", # fp32 | fp16 | int8 | int4 (if supported)
|
| 59 |
+
execution_provider="cpu", # cpu | cuda | dml
|
| 60 |
+
cache_dir=str(pwd / "cache"), # optional cache
|
| 61 |
+
extra_options={}
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
print("\n✅ Done")
|
| 65 |
+
print(f"ONNX model at: {onnx_dir}")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
main()
|
README.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: llama2
|
| 3 |
+
datasets:
|
| 4 |
+
- gsm8k
|
| 5 |
+
- competition_math
|
| 6 |
+
language:
|
| 7 |
+
- en
|
| 8 |
+
metrics:
|
| 9 |
+
- exact_match
|
| 10 |
+
library_name: onnxruntime-genai
|
| 11 |
+
pipeline_tag: text-generation
|
| 12 |
+
base: llm-agents/tora-code-7b-v1.0
|
| 13 |
+
tags:
|
| 14 |
+
- code
|
| 15 |
+
- math
|
| 16 |
+
- onnx
|
| 17 |
+
- onnxruntime-genai
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
<h1 align="center">
|
| 22 |
+
ToRA: A Tool-Integrated Reasoning Agent <br> for Mathematical Problem Solving
|
| 23 |
+
</h1>
|
| 24 |
+
|
| 25 |
+
<p align="center">
|
| 26 |
+
<a href="https://microsoft.github.io/ToRA/"><b>[🌐 Website]</b></a> •
|
| 27 |
+
<a href="https://arxiv.org/pdf/2309.17452.pdf"><b>[📜 Paper]</b></a> •
|
| 28 |
+
<a href="https://huggingface.co/llm-agents"><b>[🤗 HF Models]</b></a> •
|
| 29 |
+
<a href="https://github.com/microsoft/ToRA"><b>[🐱 GitHub]</b></a>
|
| 30 |
+
<br>
|
| 31 |
+
<a href="https://twitter.com/zhs05232838/status/1708860992631763092"><b>[🐦 Twitter]</b></a> •
|
| 32 |
+
<a href="https://www.reddit.com/r/LocalLLaMA/comments/1703k6d/tora_a_toolintegrated_reasoning_agent_for/"><b>[💬 Reddit]</b></a> •
|
| 33 |
+
<a href="https://notes.aimodels.fyi/researchers-announce-tora-training-language-models-to-better-understand-math-using-external-tools/">[🍀 Unofficial Blog]</a>
|
| 34 |
+
<!-- <a href="#-quick-start">Quick Start</a> • -->
|
| 35 |
+
<!-- <a href="#%EF%B8%8F-citation">Citation</a> -->
|
| 36 |
+
</p>
|
| 37 |
+
|
| 38 |
+
<p align="center">
|
| 39 |
+
Repo for "<a href="https://arxiv.org/pdf/2309.17452.pdf" target="_blank">ToRA: A Tool-Integrated Reasoning Agent for Mathematical Problem Solving</a>"
|
| 40 |
+
</p>
|
| 41 |
+
|
| 42 |
+
## 🔥 News
|
| 43 |
+
|
| 44 |
+
- [2023/10/08] 🔥🔥🔥 All ToRA models released at [HuggingFace](https://huggingface.co/llm-agents)!!!
|
| 45 |
+
- [2023/09/29] ToRA paper, repo, and website released.
|
| 46 |
+
|
| 47 |
+
## 💡 Introduction
|
| 48 |
+
|
| 49 |
+
ToRA is a series of Tool-integrated Reasoning Agents designed to solve challenging mathematical reasoning problems by interacting with tools, e.g., computation libraries and symbolic solvers. ToRA series seamlessly integrate natural language reasoning with the utilization of external tools, thereby amalgamating the analytical prowess of language and the computational efficiency of external tools.
|
| 50 |
+
|
| 51 |
+
| Model | Size | GSM8k | MATH | AVG@10 math tasks<sup>†</sup> |
|
| 52 |
+
|---|---|---|---|---|
|
| 53 |
+
| GPT-4 | - | 92.0 | 42.5 | 78.3 |
|
| 54 |
+
| GPT-4 (PAL) | - | 94.2 | 51.8 | 86.4 |
|
| 55 |
+
| [ToRA-7B](https://huggingface.co/llm-agents/tora-7b-v1.0) | 7B | 68.8 | 40.1 | 62.4|
|
| 56 |
+
| [ToRA-Code-7B](https://huggingface.co/llm-agents/tora-code-7b-v1.0) | 7B | 72.6 | 44.6 | 66.5|
|
| 57 |
+
| [ToRA-13B](https://huggingface.co/llm-agents/tora-13b-v1.0) | 13B | 72.7 | 43.0 | 65.9|
|
| 58 |
+
| [ToRA-Code-13B](https://huggingface.co/llm-agents/tora-code-13b-v1.0) | 13B | 75.8 | 48.1 | 71.3 |
|
| 59 |
+
| [ToRA-Code-34B<sup>*</sup>](https://huggingface.co/llm-agents/tora-code-34b-v1.0) | 34B | 80.7 | **51.0** | 74.8 |
|
| 60 |
+
| [ToRA-70B](https://huggingface.co/llm-agents/tora-70b-v1.0) | 70B | **84.3** | 49.7 | **76.9** |
|
| 61 |
+
|
| 62 |
+
- <sup>*</sup>ToRA-Code-34B is currently the first and only open-source model to achieve over 50% accuracy (pass@1) on the MATH dataset, which significantly outperforms GPT-4’s CoT result (51.0 vs. 42.5), and is competitive with GPT-4 solving problems with programs. By open-sourcing our codes and models, we hope more breakthroughs will come!
|
| 63 |
+
|
| 64 |
+
- <sup>†</sup>10 math tasks include GSM8k, MATH, GSM-Hard, SVAMP, TabMWP, ASDiv, SingleEQ, SingleOP, AddSub, and MultiArith.
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
## ⚡️ Training
|
| 68 |
+
|
| 69 |
+
The models are trained on ToRA-Corpus 16k, which contains tool-integrated reasoning trajectories of MATH and GSM8k from GPT-4.
|
| 70 |
+
|
| 71 |
+
We use imitation learning (i.e., SFT) to fine-tune the models, and then apply our proposed *output space shaping* to improve tool-integrated reasoning behaviors. Please refer to the [paper](https://arxiv.org/pdf/2309.17452.pdf) for more details.
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
## 🪁 Inference & Evaluation
|
| 75 |
+
|
| 76 |
+
Please refer to ToRA's [GitHub repo](https://github.com/microsoft/ToRA) for inference, evaluation, and training code.
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
## ☕️ Citation
|
| 80 |
+
|
| 81 |
+
If you find this repository helpful, please consider citing our paper:
|
| 82 |
+
|
| 83 |
+
```
|
| 84 |
+
@misc{gou2023tora,
|
| 85 |
+
title={ToRA: A Tool-Integrated Reasoning Agent for Mathematical Problem Solving},
|
| 86 |
+
author={Zhibin Gou and Zhihong Shao and Yeyun Gong and yelong shen and Yujiu Yang and Minlie Huang and Nan Duan and Weizhu Chen},
|
| 87 |
+
year={2023},
|
| 88 |
+
eprint={2309.17452},
|
| 89 |
+
archivePrefix={arXiv},
|
| 90 |
+
primaryClass={cs.CL}
|
| 91 |
+
}
|
| 92 |
+
```
|
genai_config.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": {
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"context_length": 16384,
|
| 5 |
+
"decoder": {
|
| 6 |
+
"session_options": {
|
| 7 |
+
"log_id": "onnxruntime-genai",
|
| 8 |
+
"provider_options": []
|
| 9 |
+
},
|
| 10 |
+
"filename": "model.onnx",
|
| 11 |
+
"head_size": 128,
|
| 12 |
+
"hidden_size": 4096,
|
| 13 |
+
"inputs": {
|
| 14 |
+
"input_ids": "input_ids",
|
| 15 |
+
"attention_mask": "attention_mask",
|
| 16 |
+
"position_ids": "position_ids",
|
| 17 |
+
"past_key_names": "past_key_values.%d.key",
|
| 18 |
+
"past_value_names": "past_key_values.%d.value"
|
| 19 |
+
},
|
| 20 |
+
"outputs": {
|
| 21 |
+
"logits": "logits",
|
| 22 |
+
"present_key_names": "present.%d.key",
|
| 23 |
+
"present_value_names": "present.%d.value"
|
| 24 |
+
},
|
| 25 |
+
"num_attention_heads": 32,
|
| 26 |
+
"num_hidden_layers": 32,
|
| 27 |
+
"num_key_value_heads": 32
|
| 28 |
+
},
|
| 29 |
+
"eos_token_id": 2,
|
| 30 |
+
"pad_token_id": 0,
|
| 31 |
+
"type": "llama",
|
| 32 |
+
"vocab_size": 32001
|
| 33 |
+
},
|
| 34 |
+
"search": {
|
| 35 |
+
"diversity_penalty": 0.0,
|
| 36 |
+
"do_sample": false,
|
| 37 |
+
"early_stopping": true,
|
| 38 |
+
"length_penalty": 1.0,
|
| 39 |
+
"max_length": 16384,
|
| 40 |
+
"min_length": 0,
|
| 41 |
+
"no_repeat_ngram_size": 0,
|
| 42 |
+
"num_beams": 1,
|
| 43 |
+
"num_return_sequences": 1,
|
| 44 |
+
"past_present_share_buffer": false,
|
| 45 |
+
"repetition_penalty": 1.0,
|
| 46 |
+
"temperature": 1.0,
|
| 47 |
+
"top_k": 50,
|
| 48 |
+
"top_p": 1.0
|
| 49 |
+
}
|
| 50 |
+
}
|
model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e935528cdf8ff6ef3472ede2c6878e0e8d999745af70ae2e0f338213fcdac144
|
| 3 |
+
size 250201
|
model.onnx.data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c604c4c49509e6173de3dcfa191fafce9600282fd7b609247c489d82bfe1e3e
|
| 3 |
+
size 13481156608
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"eos_token": "</s>",
|
| 4 |
+
"pad_token": "<pad>",
|
| 5 |
+
"unk_token": "<unk>"
|
| 6 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": null,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": "<s>",
|
| 5 |
+
"clean_up_tokenization_spaces": false,
|
| 6 |
+
"eos_token": "</s>",
|
| 7 |
+
"extra_special_tokens": [
|
| 8 |
+
"<s>",
|
| 9 |
+
"</s>"
|
| 10 |
+
],
|
| 11 |
+
"is_local": true,
|
| 12 |
+
"model_max_length": 16384,
|
| 13 |
+
"pad_token": "<pad>",
|
| 14 |
+
"sp_model_kwargs": {},
|
| 15 |
+
"spaces_between_special_tokens": false,
|
| 16 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 17 |
+
"unk_token": "<unk>",
|
| 18 |
+
"use_default_system_prompt": true
|
| 19 |
+
}
|