File size: 196 Bytes
672259a
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

batch_size = 12
block_size = 1024
bias = False
real_data = True
seed = 1337
device = 'cuda'
dtype = 'bfloat16'
compile = True
profile = False

n_layer = 12
n_head = 12
n_embd = 768