File size: 1,026 Bytes
311b542 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from transformers import AutoTokenizer
from openvino import compile_model
import openvino_tokenizers
import os
import time
print(f"Process ID: {os.getpid()}")
hf_tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-uncased")
"""
convert_tokenizer google-bert/bert-base-multilingual-uncased -o bert-base-multilingual-uncased --skip-special-tokens --trust-remote-code --utf8_replace_mode replace
"""
ov_tokenizer = "openvino_tokenizer.xml"
compiled_tokenzier = compile_model(ov_tokenizer)
text_input = ["I am developing a clang-based c++ compiler"]
#print("text_input: ", text_input)
hf_output = hf_tokenizer(text_input[0])
print("hf_output: ", hf_output["input_ids"])
# Existing test
ov_output = compiled_tokenzier(text_input)
print("ov_output: ", ov_output["input_ids"])
# Measure time for compiled_tokenizer
start_time = time.time()
ov_output = compiled_tokenzier(text_input)
end_time = time.time()
print(f"Time taken for compiled_tokenizer: {(end_time - start_time) * 1000:.2f} ms")
# Additional tests
|