dacorvo HF Staff commited on
Commit
69ff3fb
·
verified ·
1 Parent(s): 614cbff

Update inference-cache-config/trn1/llama4.json

Browse files
inference-cache-config/trn1/llama4.json CHANGED
@@ -6,7 +6,6 @@
6
  "batch_size": 1,
7
  "sequence_length": 4096,
8
  "tensor_parallel_size": 16,
9
- "auto_cast_type": "bf16"
10
  },
11
  {
12
  "task": "text-generation",
@@ -14,7 +13,6 @@
14
  "batch_size": 4,
15
  "sequence_length": 4096,
16
  "tensor_parallel_size": 16,
17
- "auto_cast_type": "bf16"
18
  },
19
  {
20
  "task": "text-generation",
@@ -22,7 +20,6 @@
22
  "batch_size": 1,
23
  "sequence_length": 4096,
24
  "tensor_parallel_size": 32,
25
- "auto_cast_type": "bf16"
26
  },
27
  {
28
  "task": "text-generation",
@@ -30,7 +27,6 @@
30
  "batch_size": 4,
31
  "sequence_length": 4096,
32
  "tensor_parallel_size": 32,
33
- "auto_cast_type": "bf16"
34
  },
35
  {
36
  "task": "text-generation",
@@ -38,7 +34,6 @@
38
  "batch_size": 8,
39
  "sequence_length": 4096,
40
  "tensor_parallel_size": 32,
41
- "auto_cast_type": "bf16"
42
  }
43
  ]
44
  }
 
6
  "batch_size": 1,
7
  "sequence_length": 4096,
8
  "tensor_parallel_size": 16,
 
9
  },
10
  {
11
  "task": "text-generation",
 
13
  "batch_size": 4,
14
  "sequence_length": 4096,
15
  "tensor_parallel_size": 16,
 
16
  },
17
  {
18
  "task": "text-generation",
 
20
  "batch_size": 1,
21
  "sequence_length": 4096,
22
  "tensor_parallel_size": 32,
 
23
  },
24
  {
25
  "task": "text-generation",
 
27
  "batch_size": 4,
28
  "sequence_length": 4096,
29
  "tensor_parallel_size": 32,
 
30
  },
31
  {
32
  "task": "text-generation",
 
34
  "batch_size": 8,
35
  "sequence_length": 4096,
36
  "tensor_parallel_size": 32,
 
37
  }
38
  ]
39
  }