{ "deployments": [ { "spec": { "model_label": "Llama-3.1-8B", "model_id": "meta-llama/Llama-3.1-8B-Instruct", "gpus_per_replica": 1, "num_replicas": 720, "initial_batch_size": 128, "itl_deadline_s": 0.08, "feasible_batch_sizes": [1, 2, 3, 4, 5, 6, 7, 8] }, "vllm_base_url": "http://node1:8000", "gpu_endpoints": [ {"host": "node1", "port": 4938, "gpu_indices": [0, 1, 2, 3]}, {"host": "node1", "port": 4938, "gpu_indices": [4, 5, 6, 7]} ] }, { "spec": { "model_label": "Llama-3.1-70B", "model_id": "meta-llama/Llama-3.1-70B-Instruct", "gpus_per_replica": 4, "num_replicas": 180, "initial_batch_size": 128, "itl_deadline_s": 0.10, "feasible_batch_sizes": [1, 2, 3, 4, 5, 6, 7, 8] }, "vllm_base_url": "http://node2:8000", "gpu_endpoints": [ {"host": "node2", "port": 4938, "gpu_indices": [0, 1, 2, 3]}, {"host": "node2", "port": 4938, "gpu_indices": [4, 5, 6, 7]} ] } ], "requests": { "dataset": "lm-arena-chat", "num_requests": 1000, "max_completion_tokens": 512, "seed": 0 }, "requests_dir": null, "ieee_case_dir": "examples/ieee13", "data_dir": null, "data_sources": [ {"model_label": "Llama-3.1-8B", "task": "lm-arena-chat", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024]}, {"model_label": "Llama-3.1-70B", "task": "lm-arena-chat", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048]} ], "mlenergy_data_dir": null }