| defaults: | |
| - benchmark # inheriting benchmark schema | |
| - scenario: inference | |
| - launcher: process | |
| - backend: pytorch | |
| - _self_ # for hydra 1.1 compatibility | |
| name: pytorch_generate | |
| launcher: | |
| start_method: spawn | |
| device_isolation: true | |
| device_isolation_action: warn | |
| backend: | |
| device: cuda | |
| device_ids: 0 | |
| no_weights: true | |
| model: meta-llama/Llama-2-7b-hf | |
| cache_implementation: static | |
| torch_compile: true | |
| torch_dtype: float16 | |
| torch_compile_config: | |
| backend: inductor | |
| mode: reduce-overhead | |
| fullgraph: true | |
| scenario: | |
| input_shapes: | |
| batch_size: 1 | |
| sequence_length: 7 | |
| generate_kwargs: | |
| max_new_tokens: 128 | |
| min_new_tokens: 128 | |
| do_sample: false | |
| memory: true | |
| latency: true | |
| iterations: 2 | |
| duration: 0 | |
| # hydra/cli specific settings | |
| hydra: | |
| run: | |
| # where to store run results | |
| dir: runs/${name} | |
| job: | |
| # change working directory to the run directory | |
| chdir: true | |
| env_set: | |
| # set environment variable OVERRIDE_BENCHMARKS to 1 | |
| # to not skip benchmarks that have been run before | |
| OVERRIDE_BENCHMARKS: 1 | |
| LOG_LEVEL: WARN | |
| sweep: | |
| dir: multirun | |
| subdir: ${hydra.job.override_dirname} |