File size: 2,380 Bytes
96bb363
 
 
 
 
b598e06
 
96bb363
b598e06
96bb363
b598e06
 
 
 
96bb363
 
 
 
b598e06
 
96bb363
b598e06
96bb363
b598e06
 
 
 
96bb363
 
 
 
b598e06
 
96bb363
b598e06
 
96bb363
b598e06
 
 
 
96bb363
 
 
 
b598e06
 
96bb363
b598e06
96bb363
b598e06
 
 
 
96bb363
 
 
 
b598e06
 
96bb363
b598e06
96bb363
b598e06
 
 
 
96bb363
 
 
 
 
 
b598e06
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
{
  "models": [
    {
      "model_label": "Llama-3.1-8B",
      "model_id": "meta-llama/Llama-3.1-8B-Instruct",
      "gpu_model": "H100",
      "task": "lm-arena-chat",
      "gpus_per_replica": 1,
      "tensor_parallel": 1,
      "itl_deadline_s": 0.08,
      "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024],
      "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
      "num_replicas": 720,
      "initial_batch_size": 128
    },
    {
      "model_label": "Llama-3.1-70B",
      "model_id": "meta-llama/Llama-3.1-70B-Instruct",
      "gpu_model": "H100",
      "task": "lm-arena-chat",
      "gpus_per_replica": 4,
      "tensor_parallel": 4,
      "itl_deadline_s": 0.10,
      "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048],
      "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
      "num_replicas": 180,
      "initial_batch_size": 128
    },
    {
      "model_label": "Llama-3.1-405B",
      "model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8",
      "gpu_model": "H100",
      "task": "lm-arena-chat",
      "gpus_per_replica": 8,
      "tensor_parallel": 8,
      "precision": "fp8",
      "itl_deadline_s": 0.12,
      "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
      "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
      "num_replicas": 90,
      "initial_batch_size": 128
    },
    {
      "model_label": "Qwen3-30B-A3B",
      "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
      "gpu_model": "H100",
      "task": "gpqa",
      "gpus_per_replica": 2,
      "tensor_parallel": 2,
      "itl_deadline_s": 0.06,
      "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
      "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
      "num_replicas": 480,
      "initial_batch_size": 128
    },
    {
      "model_label": "Qwen3-235B-A22B",
      "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
      "gpu_model": "H100",
      "task": "gpqa",
      "gpus_per_replica": 8,
      "tensor_parallel": 8,
      "itl_deadline_s": 0.14,
      "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
      "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
      "num_replicas": 210,
      "initial_batch_size": 128
    }
  ],
  "training_trace_params": {},
  "data_dir": null,
  "ieee_case_dir": "examples/ieee13",
  "mlenergy_data_dir": null
}