| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | """ |
| | Top-level benchmarking script that automatically discovers and runs all benchmarks |
| | in the ./benches directory, organizing outputs into model-specific subfolders. |
| | """ |
| |
|
| | import argparse |
| | import json |
| | import logging |
| | import sys |
| | import uuid |
| |
|
| | from framework.benchmark_config import BenchmarkConfig, adapt_configs, get_config_by_level |
| | from framework.benchmark_runner import BenchmarkRunner |
| |
|
| |
|
| | if __name__ == "__main__": |
| | |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument("--output-dir", type=str, default=None, help="Output dir for benchmark results") |
| | parser.add_argument("--log-level", type=str, choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="WARNING") |
| | parser.add_argument("--model-id", type=str, help="Specific model ID to benchmark (if supported by benchmarks)") |
| | parser.add_argument("--warmup", "-w", type=int, default=3, help="Number of warmup iterations") |
| | parser.add_argument("--iterations", "-i", type=int, default=10, help="Number of measurement iterations") |
| |
|
| | parser.add_argument("--batch-size", "-b", type=int, nargs="+", help="Batch size") |
| | parser.add_argument("--sequence-length", "-s", type=int, nargs="+", help="Sequence length") |
| | parser.add_argument("--num-tokens-to-generate", "-n", type=int, nargs="+", help="Number of tokens to generate") |
| |
|
| | parser.add_argument( |
| | "--level", |
| | type=int, |
| | default=1, |
| | help="Level of coverage for the benchmark. 0: only the main config, 1: a few important configs, 2: a config for" |
| | " each attn implementation an option, 3: cross-generate all combinations of configs, 4: cross-generate all" |
| | " combinations of configs w/ all compile modes", |
| | ) |
| | parser.add_argument("--config-file", type=str, help="Path to a config file stored as a json or jsonl format") |
| | parser.add_argument("--num-tokens-to-profile", "-p", type=int, default=0, help="Number of tokens to profile") |
| |
|
| | parser.add_argument("--branch-name", type=str, help="Git branch name") |
| | parser.add_argument("--commit-id", type=str, help="Git commit ID (if not provided, will auto-detect from git)") |
| | parser.add_argument("--commit-message", type=str, help="Git commit message") |
| |
|
| | parser.add_argument( |
| | "--no-gpu-monitoring", action="store_true", help="Disables GPU monitoring during benchmark runs" |
| | ) |
| |
|
| | parser.add_argument( |
| | "--push-result-to-dataset", |
| | type=str, |
| | default=None, |
| | help="Name of the dataset to push results to. If not provided, results are not pushed to the Hub.", |
| | ) |
| | args = parser.parse_args() |
| |
|
| | |
| | benchmark_run_uuid = str(uuid.uuid4())[:8] |
| | numeric_level = getattr(logging, args.log_level.upper()) |
| |
|
| | handlers = [logging.StreamHandler(sys.stdout)] |
| | logging.basicConfig( |
| | level=numeric_level, format="[%(levelname)s - %(asctime)s] %(name)s: %(message)s", handlers=handlers |
| | ) |
| |
|
| | logger = logging.getLogger("benchmark_v2") |
| | logger.info("Starting benchmark discovery and execution") |
| | logger.info(f"Benchmark run UUID: {benchmark_run_uuid}") |
| | logger.info(f"Output directory: {args.output_dir}") |
| |
|
| | |
| | if any(arg is None for arg in [args.batch_size, args.sequence_length, args.num_tokens_to_generate]): |
| | raise ValueError( |
| | "All of the arguments --batch-size, --sequence-length, and --num-tokens-to-generate are required" |
| | ) |
| |
|
| | |
| | if any(n <= 1 for n in args.num_tokens_to_generate): |
| | raise ValueError("--num_tokens_to_generate arguments should be larger than 1") |
| |
|
| | |
| | if args.config_file is not None: |
| | if args.config_file.endswith(".json"): |
| | with open(args.config_file, "r") as f: |
| | config_as_dicts = [json.load(f)] |
| | elif args.config_file.endswith(".jsonl"): |
| | with open(args.config_file, "r") as f: |
| | config_as_dicts = [json.loads(line) for line in f if line.startswith("{")] |
| | else: |
| | raise ValueError(f"Unsupported config file format: {args.config_file}") |
| | configs = [BenchmarkConfig.from_dict(config) for config in config_as_dicts] |
| | else: |
| | |
| | configs = get_config_by_level(args.level) |
| |
|
| | |
| | configs = adapt_configs( |
| | configs, |
| | args.warmup, |
| | args.iterations, |
| | args.batch_size, |
| | args.sequence_length, |
| | args.num_tokens_to_generate, |
| | not args.no_gpu_monitoring, |
| | ) |
| |
|
| | runner = BenchmarkRunner(logger, args.output_dir, args.branch_name, args.commit_id, args.commit_message) |
| | timestamp, results = runner.run_benchmarks( |
| | args.model_id, configs, args.num_tokens_to_profile, pretty_print_summary=True |
| | ) |
| |
|
| | dataset_id = args.push_result_to_dataset |
| | if dataset_id is not None and len(results) > 0: |
| | runner.push_results_to_hub(dataset_id, results, timestamp) |
| |
|