dacorvo HF Staff commited on
Commit
ecc0bf7
·
verified ·
1 Parent(s): 69ff3fb

Update inference-cache-config/trn1/llama4.json

Browse files
inference-cache-config/trn1/llama4.json CHANGED
@@ -5,35 +5,35 @@
5
  "instance_type": "trn1",
6
  "batch_size": 1,
7
  "sequence_length": 4096,
8
- "tensor_parallel_size": 16,
9
  },
10
  {
11
  "task": "text-generation",
12
  "instance_type": "trn1",
13
  "batch_size": 4,
14
  "sequence_length": 4096,
15
- "tensor_parallel_size": 16,
16
  },
17
  {
18
  "task": "text-generation",
19
  "instance_type": "trn1",
20
  "batch_size": 1,
21
  "sequence_length": 4096,
22
- "tensor_parallel_size": 32,
23
  },
24
  {
25
  "task": "text-generation",
26
  "instance_type": "trn1",
27
  "batch_size": 4,
28
  "sequence_length": 4096,
29
- "tensor_parallel_size": 32,
30
  },
31
  {
32
  "task": "text-generation",
33
  "instance_type": "trn1",
34
  "batch_size": 8,
35
  "sequence_length": 4096,
36
- "tensor_parallel_size": 32,
37
  }
38
  ]
39
  }
 
5
  "instance_type": "trn1",
6
  "batch_size": 1,
7
  "sequence_length": 4096,
8
+ "tensor_parallel_size": 16
9
  },
10
  {
11
  "task": "text-generation",
12
  "instance_type": "trn1",
13
  "batch_size": 4,
14
  "sequence_length": 4096,
15
+ "tensor_parallel_size": 16
16
  },
17
  {
18
  "task": "text-generation",
19
  "instance_type": "trn1",
20
  "batch_size": 1,
21
  "sequence_length": 4096,
22
+ "tensor_parallel_size": 32
23
  },
24
  {
25
  "task": "text-generation",
26
  "instance_type": "trn1",
27
  "batch_size": 4,
28
  "sequence_length": 4096,
29
+ "tensor_parallel_size": 32
30
  },
31
  {
32
  "task": "text-generation",
33
  "instance_type": "trn1",
34
  "batch_size": 8,
35
  "sequence_length": 4096,
36
+ "tensor_parallel_size": 32
37
  }
38
  ]
39
  }