dacorvo HF Staff commited on
Commit
6d9930a
·
verified ·
1 Parent(s): e10d2ce

Add llama3 configurations with longer sequences

Browse files
inference-cache-config/trn1/llama3.json CHANGED
@@ -6,6 +6,12 @@
6
  "tensor_parallel_size": 2,
7
  "instance_type" : "trn1"
8
  },
 
 
 
 
 
 
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
@@ -18,6 +24,12 @@
18
  "tensor_parallel_size": 2,
19
  "instance_type" : "trn1"
20
  },
 
 
 
 
 
 
21
  {
22
  "batch_size": 4,
23
  "sequence_length": 4096,
@@ -110,6 +122,12 @@
110
  "tensor_parallel_size": 24,
111
  "instance_type" : "trn1"
112
  },
 
 
 
 
 
 
113
  {
114
  "batch_size": 8,
115
  "sequence_length": 4096,
@@ -122,6 +140,12 @@
122
  "tensor_parallel_size": 32,
123
  "instance_type" : "trn1"
124
  },
 
 
 
 
 
 
125
  {
126
  "batch_size": 8,
127
  "sequence_length": 4096,
 
6
  "tensor_parallel_size": 2,
7
  "instance_type" : "trn1"
8
  },
9
+ {
10
+ "batch_size": 1,
11
+ "sequence_length": 16384,
12
+ "tensor_parallel_size": 2,
13
+ "instance_type" : "trn1"
14
+ },
15
  {
16
  "batch_size": 4,
17
  "sequence_length": 4096,
 
24
  "tensor_parallel_size": 2,
25
  "instance_type" : "trn1"
26
  },
27
+ {
28
+ "batch_size": 1,
29
+ "sequence_length": 16384,
30
+ "tensor_parallel_size": 8,
31
+ "instance_type" : "trn1"
32
+ },
33
  {
34
  "batch_size": 4,
35
  "sequence_length": 4096,
 
122
  "tensor_parallel_size": 24,
123
  "instance_type" : "trn1"
124
  },
125
+ {
126
+ "batch_size": 1,
127
+ "sequence_length": 16384,
128
+ "tensor_parallel_size": 2,
129
+ "instance_type" : "trn1"
130
+ },
131
  {
132
  "batch_size": 8,
133
  "sequence_length": 4096,
 
140
  "tensor_parallel_size": 32,
141
  "instance_type" : "trn1"
142
  },
143
+ {
144
+ "batch_size": 1,
145
+ "sequence_length": 16384,
146
+ "tensor_parallel_size": 32,
147
+ "instance_type" : "trn1"
148
+ },
149
  {
150
  "batch_size": 8,
151
  "sequence_length": 4096,