dacorvo HF Staff commited on
Commit
f8538f0
·
verified ·
1 Parent(s): 412a86d

use longer sequence length for llama3 on trn2

Browse files
inference-cache-config/trn2/llama3.json CHANGED
@@ -6,6 +6,12 @@
6
  "tensor_parallel_size": 4,
7
  "instance_type" : "trn2"
8
  },
 
 
 
 
 
 
9
  {
10
  "batch_size": 8,
11
  "sequence_length": 4096,
@@ -32,6 +38,12 @@
32
  "tensor_parallel_size" : 4,
33
  "instance_type" : "trn2"
34
  },
 
 
 
 
 
 
35
  {
36
  "batch_size": 4,
37
  "sequence_length": 4096,
@@ -52,6 +64,12 @@
52
  "tensor_parallel_size" : 4,
53
  "instance_type" : "trn2"
54
  },
 
 
 
 
 
 
55
  {
56
  "batch_size": 64,
57
  "sequence_length": 4096,
@@ -80,6 +98,12 @@
80
  "tensor_parallel_size": 64,
81
  "instance_type" : "trn2"
82
  },
 
 
 
 
 
 
83
  {
84
  "batch_size": 32,
85
  "sequence_length": 4096,
 
6
  "tensor_parallel_size": 4,
7
  "instance_type" : "trn2"
8
  },
9
+ {
10
+ "batch_size": 1,
11
+ "sequence_length": 16384,
12
+ "tensor_parallel_size": 4,
13
+ "instance_type" : "trn2"
14
+ },
15
  {
16
  "batch_size": 8,
17
  "sequence_length": 4096,
 
38
  "tensor_parallel_size" : 4,
39
  "instance_type" : "trn2"
40
  },
41
+ {
42
+ "batch_size": 1,
43
+ "sequence_length": 16384,
44
+ "tensor_parallel_size" : 4,
45
+ "instance_type" : "trn2"
46
+ },
47
  {
48
  "batch_size": 4,
49
  "sequence_length": 4096,
 
64
  "tensor_parallel_size" : 4,
65
  "instance_type" : "trn2"
66
  },
67
+ {
68
+ "batch_size": 1,
69
+ "sequence_length": 16384,
70
+ "tensor_parallel_size" : 4,
71
+ "instance_type" : "trn2"
72
+ },
73
  {
74
  "batch_size": 64,
75
  "sequence_length": 4096,
 
98
  "tensor_parallel_size": 64,
99
  "instance_type" : "trn2"
100
  },
101
+ {
102
+ "batch_size": 1,
103
+ "sequence_length": 16384,
104
+ "tensor_parallel_size": 64,
105
+ "instance_type" : "trn2"
106
+ },
107
  {
108
  "batch_size": 32,
109
  "sequence_length": 4096,