| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "onnx-ir", | |
| # "onnxruntime-genai", | |
| # "torch", | |
| # "tqdm>=4.67.1", | |
| # "transformers>=5.0.0", | |
| # ] | |
| # optional-dependencies = { cuda = [ | |
| # "torch==2.2.*+cu121", | |
| # "onnxruntime-genai-cuda", | |
| # ] } | |
| # /// | |
| from onnxruntime_genai.models.builder import create_model | |
| import os | |
| choices=["int4", "bf16", "fp16", "fp32"] | |
| choices=["cpu", "cuda", "dml", "webgpu", "NvTensorRtRtx"] | |
| from huggingface_hub import snapshot_download | |
| model_path = snapshot_download("tiiuae/Falcon3-7B-Base",local_dir="/teamspace/studios/this_studio/model") | |
| create_model(model_name=model_path, | |
| input_path=model_path, | |
| output_dir="onnx", | |
| precision="fp16", | |
| execution_provider="cpu", | |
| cache_dir=os.path.join(os.getcwd(), "model_cache"), | |
| extra_options={ "hf_token": os.getenv("hf", "true")}) | |