672259a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
batch_size = 12 block_size = 1024 bias = False real_data = True seed = 1337 device = 'cuda' dtype = 'bfloat16' compile = True profile = False n_layer = 12 n_head = 12 n_embd = 768