| { |
| "meta-llama/Llama-2-7b-chat-hf": [ |
| { |
| "batch_size": 1, |
| "sequence_length": 4096, |
| "num_cores": 2, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 1, |
| "sequence_length": 4096, |
| "num_cores": 8, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 1, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 4, |
| "sequence_length": 4096, |
| "num_cores": 2, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 4, |
| "sequence_length": 4096, |
| "num_cores": 8, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 4, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 8, |
| "sequence_length": 4096, |
| "num_cores": 8, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 8, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 16, |
| "sequence_length": 4096, |
| "num_cores": 8, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 16, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 32, |
| "sequence_length": 4096, |
| "num_cores": 8, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 32, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| } |
| ], |
| "meta-llama/Llama-2-13b-chat-hf": [ |
| { |
| "batch_size": 1, |
| "sequence_length": 4096, |
| "num_cores": 12, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 1, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 4, |
| "sequence_length": 4096, |
| "num_cores": 12, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 4, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 8, |
| "sequence_length": 4096, |
| "num_cores": 12, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 8, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 16, |
| "sequence_length": 4096, |
| "num_cores": 12, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 16, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 32, |
| "sequence_length": 4096, |
| "num_cores": 12, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 32, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| } |
| ], |
| "meta-llama/Llama-2-70b-chat-hf": [ |
| { |
| "batch_size": 1, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| }, |
| { |
| "batch_size": 4, |
| "sequence_length": 4096, |
| "num_cores": 24, |
| "auto_cast_type": "fp16" |
| } |
| ] |
| } |