File size: 2,380 Bytes
96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 96bb363 b598e06 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | {
"models": [
{
"model_label": "Llama-3.1-8B",
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
"gpu_model": "H100",
"task": "lm-arena-chat",
"gpus_per_replica": 1,
"tensor_parallel": 1,
"itl_deadline_s": 0.08,
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024],
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
"num_replicas": 720,
"initial_batch_size": 128
},
{
"model_label": "Llama-3.1-70B",
"model_id": "meta-llama/Llama-3.1-70B-Instruct",
"gpu_model": "H100",
"task": "lm-arena-chat",
"gpus_per_replica": 4,
"tensor_parallel": 4,
"itl_deadline_s": 0.10,
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048],
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
"num_replicas": 180,
"initial_batch_size": 128
},
{
"model_label": "Llama-3.1-405B",
"model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8",
"gpu_model": "H100",
"task": "lm-arena-chat",
"gpus_per_replica": 8,
"tensor_parallel": 8,
"precision": "fp8",
"itl_deadline_s": 0.12,
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
"num_replicas": 90,
"initial_batch_size": 128
},
{
"model_label": "Qwen3-30B-A3B",
"model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
"gpu_model": "H100",
"task": "gpqa",
"gpus_per_replica": 2,
"tensor_parallel": 2,
"itl_deadline_s": 0.06,
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
"num_replicas": 480,
"initial_batch_size": 128
},
{
"model_label": "Qwen3-235B-A22B",
"model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
"gpu_model": "H100",
"task": "gpqa",
"gpus_per_replica": 8,
"tensor_parallel": 8,
"itl_deadline_s": 0.14,
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
"num_replicas": 210,
"initial_batch_size": 128
}
],
"training_trace_params": {},
"data_dir": null,
"ieee_case_dir": "examples/ieee13",
"mlenergy_data_dir": null
} |