Upload 2 files
Browse files- run_speed_tests.sh +7 -0
- speed_test.py +145 -0
run_speed_tests.sh
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
python speed_test.py --model_path "naver-clova-ix/donut-base"
|
| 4 |
+
|
| 5 |
+
python speed_test.py --model_path "naver-clova-ix/donut-base" --ja_bad_words
|
| 6 |
+
|
| 7 |
+
python speed_test.py --model_path "donut-base-ascii"
|
speed_test.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from datasets import load_dataset
|
| 5 |
+
from transformers import AutoProcessor, VisionEncoderDecoderModel
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def speedometer(
|
| 9 |
+
model: torch.nn.Module,
|
| 10 |
+
pixel_values: torch.Tensor,
|
| 11 |
+
decoder_input_ids: torch.Tensor,
|
| 12 |
+
processor: AutoProcessor,
|
| 13 |
+
bad_words_ids: list,
|
| 14 |
+
warmup_iters: int = 100,
|
| 15 |
+
timing_iters: int = 100,
|
| 16 |
+
num_tokens: int = 10,
|
| 17 |
+
) -> None:
|
| 18 |
+
"""Measure average run time for a PyTorch module
|
| 19 |
+
|
| 20 |
+
Performs forward passes.
|
| 21 |
+
"""
|
| 22 |
+
start = torch.cuda.Event(enable_timing=True)
|
| 23 |
+
end = torch.cuda.Event(enable_timing=True)
|
| 24 |
+
|
| 25 |
+
# Warmup runs
|
| 26 |
+
torch.cuda.synchronize()
|
| 27 |
+
for _ in range(warmup_iters):
|
| 28 |
+
outputs = model.generate(
|
| 29 |
+
pixel_values.to(model.device),
|
| 30 |
+
decoder_input_ids=decoder_input_ids.to(model.device),
|
| 31 |
+
early_stopping=True,
|
| 32 |
+
pad_token_id=processor.tokenizer.pad_token_id,
|
| 33 |
+
eos_token_id=processor.tokenizer.eos_token_id,
|
| 34 |
+
use_cache=True,
|
| 35 |
+
num_beams=1,
|
| 36 |
+
bad_words_ids=bad_words_ids,
|
| 37 |
+
return_dict_in_generate=True,
|
| 38 |
+
min_length=num_tokens,
|
| 39 |
+
max_length=num_tokens,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# Timing runs
|
| 43 |
+
start.record()
|
| 44 |
+
for _ in range(timing_iters):
|
| 45 |
+
outputs = model.generate(
|
| 46 |
+
pixel_values.to(model.device),
|
| 47 |
+
decoder_input_ids=decoder_input_ids.to(model.device),
|
| 48 |
+
early_stopping=True,
|
| 49 |
+
pad_token_id=processor.tokenizer.pad_token_id,
|
| 50 |
+
eos_token_id=processor.tokenizer.eos_token_id,
|
| 51 |
+
use_cache=True,
|
| 52 |
+
num_beams=1,
|
| 53 |
+
bad_words_ids=bad_words_ids,
|
| 54 |
+
return_dict_in_generate=True,
|
| 55 |
+
min_length=num_tokens,
|
| 56 |
+
max_length=num_tokens,
|
| 57 |
+
)
|
| 58 |
+
end.record()
|
| 59 |
+
torch.cuda.synchronize()
|
| 60 |
+
|
| 61 |
+
mean = start.elapsed_time(end) / timing_iters
|
| 62 |
+
print(f"Mean time: {mean} ms")
|
| 63 |
+
|
| 64 |
+
return mean
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def get_ja_list_of_lists(processor):
|
| 68 |
+
def is_japanese(s):
|
| 69 |
+
"Made by GPT-4: https://chat.openai.com/share/a795b15c-8534-40b9-9699-c8c1319f5f25"
|
| 70 |
+
for char in s:
|
| 71 |
+
code_point = ord(char)
|
| 72 |
+
if (
|
| 73 |
+
0x3040 <= code_point <= 0x309F
|
| 74 |
+
or 0x30A0 <= code_point <= 0x30FF
|
| 75 |
+
or 0x4E00 <= code_point <= 0x9FFF
|
| 76 |
+
or 0x3400 <= code_point <= 0x4DBF
|
| 77 |
+
or 0x20000 <= code_point <= 0x2A6DF
|
| 78 |
+
or 0x31F0 <= code_point <= 0x31FF
|
| 79 |
+
or 0xFF00 <= code_point <= 0xFFEF
|
| 80 |
+
or 0x3000 <= code_point <= 0x303F
|
| 81 |
+
or 0x3200 <= code_point <= 0x32FF
|
| 82 |
+
):
|
| 83 |
+
continue
|
| 84 |
+
else:
|
| 85 |
+
return False
|
| 86 |
+
return True
|
| 87 |
+
|
| 88 |
+
ja_tokens, ja_ids = [], []
|
| 89 |
+
for token, id in processor.tokenizer.vocab.items():
|
| 90 |
+
if is_japanese(token.lstrip("▁")):
|
| 91 |
+
ja_tokens.append(token)
|
| 92 |
+
ja_ids.append(id)
|
| 93 |
+
|
| 94 |
+
return [[x] for x in ja_ids]
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def main():
|
| 98 |
+
|
| 99 |
+
parser = argparse.ArgumentParser(description='Description of your program')
|
| 100 |
+
parser.add_argument('--model_path', help='Description for foo argument', required=True)
|
| 101 |
+
parser.add_argument('--ja_bad_words', help='Use ja bad_words_ids', action="store_true", default=False)
|
| 102 |
+
args = parser.parse_args()
|
| 103 |
+
|
| 104 |
+
print("Running speed test on model: ", args.model_path, "with ja_bad_words: ", args.ja_bad_words)
|
| 105 |
+
|
| 106 |
+
processor = AutoProcessor.from_pretrained(args.model_path)
|
| 107 |
+
model = VisionEncoderDecoderModel.from_pretrained(args.model_path)
|
| 108 |
+
|
| 109 |
+
device = 0 if torch.cuda.is_available() else torch.device("cpu")
|
| 110 |
+
|
| 111 |
+
model.to(device)
|
| 112 |
+
|
| 113 |
+
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
|
| 114 |
+
|
| 115 |
+
image = dataset[1]["image"]
|
| 116 |
+
|
| 117 |
+
task_prompt = "<s_synthdog>"
|
| 118 |
+
decoder_input_ids = processor.tokenizer(
|
| 119 |
+
task_prompt, add_special_tokens=False, return_tensors="pt"
|
| 120 |
+
).input_ids
|
| 121 |
+
|
| 122 |
+
pixel_values = processor(image, return_tensors="pt").pixel_values
|
| 123 |
+
|
| 124 |
+
bad_words_ids = [[processor.tokenizer.unk_token_id]]
|
| 125 |
+
|
| 126 |
+
if args.ja_bad_words:
|
| 127 |
+
bad_words_ids += get_ja_list_of_lists(processor)
|
| 128 |
+
|
| 129 |
+
print("Length of bad_words_ids: ", len(bad_words_ids))
|
| 130 |
+
|
| 131 |
+
results = speedometer(
|
| 132 |
+
model,
|
| 133 |
+
pixel_values,
|
| 134 |
+
decoder_input_ids,
|
| 135 |
+
processor,
|
| 136 |
+
bad_words_ids=bad_words_ids,
|
| 137 |
+
warmup_iters=100,
|
| 138 |
+
timing_iters=100,
|
| 139 |
+
num_tokens=10,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
if __name__ == "__main__":
|
| 144 |
+
|
| 145 |
+
main()
|