{ "ibm-granite/granite-3.1-2b-instruct": [ { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 4, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 8, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 8, "instance_type" : "trn1" }, { "batch_size": 32, "sequence_length": 4096, "tensor_parallel_size": 8, "instance_type" : "trn1" } ], "ibm-granite/granite-3.1-8b-instruct": [ { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 4, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 8, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 8, "instance_type" : "trn1" }, { "batch_size": 32, "sequence_length": 4096, "tensor_parallel_size": 8, "instance_type" : "trn1" } ], "ibm-granite/granite-3.3-8b-instruct": [ { "batch_size": 1, "sequence_length": 16384, "tensor_parallel_size": 8, "instance_type" : "trn1" } ] }