| | import re |
| | import subprocess |
| | from pathlib import Path |
| |
|
| | from tabulate import tabulate |
| |
|
| |
|
| | SCRIPT_LOCATION = (Path(__file__).parent.parent.parent / "examples/pytorch/continuous_batching.py").as_posix() |
| | COMMON_ARGS = "--log-level WARNING --seed 0".split() |
| |
|
| |
|
| | def run_and_parse_cb_example(args: list[str]) -> dict: |
| | print(f"Benchmarking with args: {args}") |
| | output = subprocess.check_output( |
| | ["python", SCRIPT_LOCATION] + args.split() + COMMON_ARGS, |
| | |
| | ).decode("utf-8") |
| | if "Generation thread terminated unexpectedly." in output: |
| | return { |
| | "args": args, |
| | "time_seconds": "X", |
| | "num_tokens": "X", |
| | "throughput_tok_per_sec": "ERROR", |
| | } |
| | pattern = r"CB generation took: ([\d.]+) seconds for (\d+) tokens\. ([\d.]+)tok/s" |
| | match = re.search(pattern, output) |
| | if match is not None: |
| | return { |
| | "args": args, |
| | "time_seconds": float(match.group(1)), |
| | "num_tokens": int(match.group(2)), |
| | "throughput_tok_per_sec": float(match.group(3)), |
| | } |
| | return {} |
| |
|
| |
|
| | if __name__ == "__main__": |
| | results = [ |
| | { |
| | "args": "Arguments", |
| | "time_seconds": "Duration (s)", |
| | "num_tokens": "Generated tokens", |
| | "throughput_tok_per_sec": "Throughput (tok/s)", |
| | } |
| | ] |
| |
|
| | |
| | results.append(run_and_parse_cb_example("--samples 10")) |
| | results.append(run_and_parse_cb_example("--samples 20 --num-blocks 16")) |
| | results.append(run_and_parse_cb_example("--samples 50")) |
| |
|
| | |
| | results.append(run_and_parse_cb_example("--samples 100")) |
| | results.append(run_and_parse_cb_example("--samples 100 --attn flash_attention_2")) |
| | results.append(run_and_parse_cb_example("--samples 100 --attn sdpa")) |
| |
|
| | |
| | results.append(run_and_parse_cb_example("--samples 500")) |
| |
|
| | |
| | results.append(run_and_parse_cb_example("--samples 500 --add-prefix --compile")) |
| |
|
| | |
| | results.append(run_and_parse_cb_example("--samples 50 --num-return-sequences 8 --do-sample")) |
| | results.append(run_and_parse_cb_example("--samples 100 --num-return-sequences 4 --do-sample")) |
| |
|
| | print() |
| | print(tabulate(results, tablefmt="github")) |
| |
|