{ "models": [ { "model_label": "Llama-3.1-8B", "model_id": "meta-llama/Llama-3.1-8B-Instruct", "gpu_model": "H100", "task": "lm-arena-chat", "gpus_per_replica": 1, "tensor_parallel": 1, "itl_deadline_s": 0.08, "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024], "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512], "num_replicas": 720, "initial_batch_size": 128 }, { "model_label": "Llama-3.1-70B", "model_id": "meta-llama/Llama-3.1-70B-Instruct", "gpu_model": "H100", "task": "lm-arena-chat", "gpus_per_replica": 4, "tensor_parallel": 4, "itl_deadline_s": 0.10, "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048], "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512], "num_replicas": 180, "initial_batch_size": 128 }, { "model_label": "Llama-3.1-405B", "model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8", "gpu_model": "H100", "task": "lm-arena-chat", "gpus_per_replica": 8, "tensor_parallel": 8, "precision": "fp8", "itl_deadline_s": 0.12, "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512], "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512], "num_replicas": 90, "initial_batch_size": 128 }, { "model_label": "Qwen3-30B-A3B", "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "gpu_model": "H100", "task": "gpqa", "gpus_per_replica": 2, "tensor_parallel": 2, "itl_deadline_s": 0.06, "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512], "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512], "num_replicas": 480, "initial_batch_size": 128 }, { "model_label": "Qwen3-235B-A22B", "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "gpu_model": "H100", "task": "gpqa", "gpus_per_replica": 8, "tensor_parallel": 8, "itl_deadline_s": 0.14, "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512], "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512], "num_replicas": 210, "initial_batch_size": 128 } ], "training_trace_params": {}, "data_dir": null, "ieee_case_dir": "examples/ieee13", "mlenergy_data_dir": null }