| batch_size = 12 | |
| block_size = 1024 | |
| bias = False | |
| real_data = True | |
| seed = 1337 | |
| device = 'cuda' | |
| dtype = 'bfloat16' | |
| compile = True | |
| profile = False | |
| n_layer = 12 | |
| n_head = 12 | |
| n_embd = 768 | |
| batch_size = 12 | |
| block_size = 1024 | |
| bias = False | |
| real_data = True | |
| seed = 1337 | |
| device = 'cuda' | |
| dtype = 'bfloat16' | |
| compile = True | |
| profile = False | |
| n_layer = 12 | |
| n_head = 12 | |
| n_embd = 768 | |