Text Generation
ONNX
English
onnxruntime-genai
code
math
Prince-1 commited on
Commit
db87bad
·
verified ·
1 Parent(s): 171effb

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -14,6 +14,7 @@
14
  *.npy filter=lfs diff=lfs merge=lfs -text
15
  *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
 
14
  *.npy filter=lfs diff=lfs merge=lfs -text
15
  *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx.data filter=lfs diff=lfs merge=lfs -text
18
  *.ot filter=lfs diff=lfs merge=lfs -text
19
  *.parquet filter=lfs diff=lfs merge=lfs -text
20
  *.pb filter=lfs diff=lfs merge=lfs -text
Convert.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "flash-linear-attention>=0.4.2",
5
+ # "hf-xet>=1.4.3",
6
+ # "huggingface-hub>=1.8.0",
7
+ # "onnx>=1.21.0",
8
+ # "onnx-ir>=0.2.0",
9
+ # "onnxruntime>=1.24.4",
10
+ # "onnxruntime-genai>=0.13.1",
11
+ # "sentencepiece>=0.2.1",
12
+ # "tiktoken>=0.12.0",
13
+ # "torch>=2.11.0",
14
+ # "transformers>=5.4.0",
15
+ # ]
16
+ # ///
17
+
18
+ import argparse
19
+ from pathlib import Path
20
+ from huggingface_hub import snapshot_download
21
+ #from onnxruntime_genai.python.models.builder import create_model
22
+ from onnxruntime_genai.models.builder import create_model
23
+ def main():
24
+ parser = argparse.ArgumentParser()
25
+ parser.add_argument("--name", required=False,default=None)
26
+ parser.add_argument("--token",required=False)
27
+ args = parser.parse_args()
28
+
29
+ token = args.token if args.token else None
30
+
31
+ pwd = Path.cwd()
32
+ model_dir = pwd / "model"
33
+ onnx_dir = pwd / "onnx"
34
+ cache_dir = pwd / "cache"
35
+ model_dir.mkdir(exist_ok=True)
36
+ onnx_dir.mkdir(exist_ok=True)
37
+ cache_dir.mkdir(exist_ok=True)
38
+
39
+ # ===== STEP 1: DOWNLOAD (HF HUB + XET backend automatically used) =====
40
+ print(">> Downloading model via huggingface_hub (Xet enabled if installed)...")
41
+
42
+ local_path = snapshot_download(
43
+ repo_id=args.name,
44
+ local_dir=str(model_dir),
45
+ token=token
46
+ #local_dir_use_symlinks=False # important for ONNX tools
47
+ )
48
+
49
+ print(f"Model downloaded to: {local_path}")
50
+
51
+ # ===== STEP 2: CONVERT USING ONNX GENAI BUILDER =====
52
+ print(">> Converting to ONNX (GenAI format)...")
53
+
54
+ create_model(
55
+ model_name=args.name,
56
+ input_path=str(model_dir), # HF model directory
57
+ output_dir=str(onnx_dir), # ONNX output
58
+ precision="fp16", # fp32 | fp16 | int8 | int4 (if supported)
59
+ execution_provider="cpu", # cpu | cuda | dml
60
+ cache_dir=str(pwd / "cache"), # optional cache
61
+ extra_options={}
62
+ )
63
+
64
+ print("\n✅ Done")
65
+ print(f"ONNX model at: {onnx_dir}")
66
+
67
+
68
+ if __name__ == "__main__":
69
+ main()
README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: llama2
3
+ datasets:
4
+ - gsm8k
5
+ - competition_math
6
+ language:
7
+ - en
8
+ metrics:
9
+ - exact_match
10
+ library_name: onnxruntime-genai
11
+ pipeline_tag: text-generation
12
+ base: llm-agents/tora-code-7b-v1.0
13
+ tags:
14
+ - code
15
+ - math
16
+ - onnx
17
+ - onnxruntime-genai
18
+ ---
19
+
20
+
21
+ <h1 align="center">
22
+ ToRA: A Tool-Integrated Reasoning Agent <br> for Mathematical Problem Solving
23
+ </h1>
24
+
25
+ <p align="center">
26
+ <a href="https://microsoft.github.io/ToRA/"><b>[🌐 Website]</b></a> •
27
+ <a href="https://arxiv.org/pdf/2309.17452.pdf"><b>[📜 Paper]</b></a> •
28
+ <a href="https://huggingface.co/llm-agents"><b>[🤗 HF Models]</b></a> •
29
+ <a href="https://github.com/microsoft/ToRA"><b>[🐱 GitHub]</b></a>
30
+ <br>
31
+ <a href="https://twitter.com/zhs05232838/status/1708860992631763092"><b>[🐦 Twitter]</b></a> •
32
+ <a href="https://www.reddit.com/r/LocalLLaMA/comments/1703k6d/tora_a_toolintegrated_reasoning_agent_for/"><b>[💬 Reddit]</b></a> •
33
+ <a href="https://notes.aimodels.fyi/researchers-announce-tora-training-language-models-to-better-understand-math-using-external-tools/">[🍀 Unofficial Blog]</a>
34
+ <!-- <a href="#-quick-start">Quick Start</a> • -->
35
+ <!-- <a href="#%EF%B8%8F-citation">Citation</a> -->
36
+ </p>
37
+
38
+ <p align="center">
39
+ Repo for "<a href="https://arxiv.org/pdf/2309.17452.pdf" target="_blank">ToRA: A Tool-Integrated Reasoning Agent for Mathematical Problem Solving</a>"
40
+ </p>
41
+
42
+ ## 🔥 News
43
+
44
+ - [2023/10/08] 🔥🔥🔥 All ToRA models released at [HuggingFace](https://huggingface.co/llm-agents)!!!
45
+ - [2023/09/29] ToRA paper, repo, and website released.
46
+
47
+ ## 💡 Introduction
48
+
49
+ ToRA is a series of Tool-integrated Reasoning Agents designed to solve challenging mathematical reasoning problems by interacting with tools, e.g., computation libraries and symbolic solvers. ToRA series seamlessly integrate natural language reasoning with the utilization of external tools, thereby amalgamating the analytical prowess of language and the computational efficiency of external tools.
50
+
51
+ | Model | Size | GSM8k | MATH | AVG@10 math tasks<sup>&dagger;</sup> |
52
+ |---|---|---|---|---|
53
+ | GPT-4 | - | 92.0 | 42.5 | 78.3 |
54
+ | GPT-4 (PAL) | - | 94.2 | 51.8 | 86.4 |
55
+ | [ToRA-7B](https://huggingface.co/llm-agents/tora-7b-v1.0) | 7B | 68.8 | 40.1 | 62.4|
56
+ | [ToRA-Code-7B](https://huggingface.co/llm-agents/tora-code-7b-v1.0) | 7B | 72.6 | 44.6 | 66.5|
57
+ | [ToRA-13B](https://huggingface.co/llm-agents/tora-13b-v1.0) | 13B | 72.7 | 43.0 | 65.9|
58
+ | [ToRA-Code-13B](https://huggingface.co/llm-agents/tora-code-13b-v1.0) | 13B | 75.8 | 48.1 | 71.3 |
59
+ | [ToRA-Code-34B<sup>*</sup>](https://huggingface.co/llm-agents/tora-code-34b-v1.0) | 34B | 80.7 | **51.0** | 74.8 |
60
+ | [ToRA-70B](https://huggingface.co/llm-agents/tora-70b-v1.0) | 70B | **84.3** | 49.7 | **76.9** |
61
+
62
+ - <sup>*</sup>ToRA-Code-34B is currently the first and only open-source model to achieve over 50% accuracy (pass@1) on the MATH dataset, which significantly outperforms GPT-4’s CoT result (51.0 vs. 42.5), and is competitive with GPT-4 solving problems with programs. By open-sourcing our codes and models, we hope more breakthroughs will come!
63
+
64
+ - <sup>&dagger;</sup>10 math tasks include GSM8k, MATH, GSM-Hard, SVAMP, TabMWP, ASDiv, SingleEQ, SingleOP, AddSub, and MultiArith.
65
+
66
+
67
+ ## ⚡️ Training
68
+
69
+ The models are trained on ToRA-Corpus 16k, which contains tool-integrated reasoning trajectories of MATH and GSM8k from GPT-4.
70
+
71
+ We use imitation learning (i.e., SFT) to fine-tune the models, and then apply our proposed *output space shaping* to improve tool-integrated reasoning behaviors. Please refer to the [paper](https://arxiv.org/pdf/2309.17452.pdf) for more details.
72
+
73
+
74
+ ## 🪁 Inference & Evaluation
75
+
76
+ Please refer to ToRA's [GitHub repo](https://github.com/microsoft/ToRA) for inference, evaluation, and training code.
77
+
78
+
79
+ ## ☕️ Citation
80
+
81
+ If you find this repository helpful, please consider citing our paper:
82
+
83
+ ```
84
+ @misc{gou2023tora,
85
+ title={ToRA: A Tool-Integrated Reasoning Agent for Mathematical Problem Solving},
86
+ author={Zhibin Gou and Zhihong Shao and Yeyun Gong and yelong shen and Yujiu Yang and Minlie Huang and Nan Duan and Weizhu Chen},
87
+ year={2023},
88
+ eprint={2309.17452},
89
+ archivePrefix={arXiv},
90
+ primaryClass={cs.CL}
91
+ }
92
+ ```
genai_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 16384,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": []
9
+ },
10
+ "filename": "model.onnx",
11
+ "head_size": 128,
12
+ "hidden_size": 4096,
13
+ "inputs": {
14
+ "input_ids": "input_ids",
15
+ "attention_mask": "attention_mask",
16
+ "position_ids": "position_ids",
17
+ "past_key_names": "past_key_values.%d.key",
18
+ "past_value_names": "past_key_values.%d.value"
19
+ },
20
+ "outputs": {
21
+ "logits": "logits",
22
+ "present_key_names": "present.%d.key",
23
+ "present_value_names": "present.%d.value"
24
+ },
25
+ "num_attention_heads": 32,
26
+ "num_hidden_layers": 32,
27
+ "num_key_value_heads": 32
28
+ },
29
+ "eos_token_id": 2,
30
+ "pad_token_id": 0,
31
+ "type": "llama",
32
+ "vocab_size": 32001
33
+ },
34
+ "search": {
35
+ "diversity_penalty": 0.0,
36
+ "do_sample": false,
37
+ "early_stopping": true,
38
+ "length_penalty": 1.0,
39
+ "max_length": 16384,
40
+ "min_length": 0,
41
+ "no_repeat_ngram_size": 0,
42
+ "num_beams": 1,
43
+ "num_return_sequences": 1,
44
+ "past_present_share_buffer": false,
45
+ "repetition_penalty": 1.0,
46
+ "temperature": 1.0,
47
+ "top_k": 50,
48
+ "top_p": 1.0
49
+ }
50
+ }
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e935528cdf8ff6ef3472ede2c6878e0e8d999745af70ae2e0f338213fcdac144
3
+ size 250201
model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c604c4c49509e6173de3dcfa191fafce9600282fd7b609247c489d82bfe1e3e
3
+ size 13481156608
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": null,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "</s>",
7
+ "extra_special_tokens": [
8
+ "<s>",
9
+ "</s>"
10
+ ],
11
+ "is_local": true,
12
+ "model_max_length": 16384,
13
+ "pad_token": "<pad>",
14
+ "sp_model_kwargs": {},
15
+ "spaces_between_special_tokens": false,
16
+ "tokenizer_class": "LlamaTokenizer",
17
+ "unk_token": "<unk>",
18
+ "use_default_system_prompt": true
19
+ }