| import onnxruntime as ort |
| from transformers import AutoTokenizer |
| import numpy as np |
| import os |
|
|
| print("my env", os.environ["XLNX_VART_FIRMWARE"]) |
|
|
| onnx_path = r"C:\Users\Felix\Olive\examples\gpt2\cache\models\1_VitisAIQuantization-1193226590a636c107851db60c66899c-ebec96f9d75c46bed8dc01c8240c6bad-cpu-cpu\output_model\model.onnx" |
| config_path = r"C:\Users\Felix\Downloads\voe-3.5-win_amd64\voe-3.5-win_amd64\vaip_config.json" |
|
|
| |
|
|
| tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") |
| tokenizer.pad_token_id = tokenizer.eos_token_id |
| tokenizer.padding_side = "left" |
|
|
| session = ort.InferenceSession(onnx_path, providers=['VitisAIExecutionProvider'], provider_options=[{'config_file': config_path}]) |
| print("after load") |
| inps = tokenizer("Hey hey hey! This is me and", return_tensors="np", padding="max_length", max_length=128) |
|
|
| inputs = { |
| "input_ids": inps["input_ids"].astype(np.int32), |
| "attention_mask": inps["attention_mask"].astype(np.int32), |
| "position_ids": np.arange(inps["attention_mask"].shape[1], dtype=np.int32)[None, :] |
| } |
|
|
| result = session.run(None, inputs) |
|
|
| res_logits = result[0] |
|
|
| res_logits = np.argmax(res_logits, axis=-1) |
| print(tokenizer.batch_decode(res_logits)) |
|
|
| inps = tokenizer("Hey hey hey! This is me and I love to", return_tensors="np", padding="max_length", max_length=128) |
|
|
| inputs = { |
| "input_ids": inps["input_ids"].astype(np.int32), |
| "attention_mask": inps["attention_mask"].astype(np.int32), |
| "position_ids": np.arange(inps["attention_mask"].shape[1], dtype=np.int32)[None, :] |
| } |
|
|
| result = session.run(None, inputs) |
|
|
| res_logits = result[0] |
|
|
| res_logits = np.argmax(res_logits, axis=-1) |
| print(tokenizer.batch_decode(res_logits)) |