edwjin commited on
Commit
9321d3a
·
verified ·
1 Parent(s): 548021b

Update constants.py

Browse files
Files changed (1) hide show
  1. constants.py +21 -19
constants.py CHANGED
@@ -1,19 +1,21 @@
1
- seed = 42
2
-
3
- """ Hyperparameters to use for training to roughly match
4
- the numbers mentioned in the assignment description """
5
- batch_size = 16 # Number of independent sequences we will process in parallel
6
- block_size = 1024 # Maximum context length for predictions
7
- learning_rate = 1e-3 # Learning rate for the optimizer , OG: 1e-3
8
- n_embd = 64 # Embedding dimension
9
- n_head = 2 # Number of attention heads, OG = 2
10
- n_layer = 4 # Number of transformer layers, OG = 4
11
-
12
-
13
- ## classifier training hyperparameters. It is a simple 1 hidden layer feedforward network, with input
14
- ## size of 64, hidden size of 50 and output size of 3.
15
-
16
- n_input = 64 # Input size for the classifier, should match the embedding size of the transformer
17
- n_hidden = 100 # Hidden size for the classifier
18
- n_output = 46 # Output size for the classifier, we have 46 classes
19
- epochs_CLS = 15 # epochs for classifier training
 
 
 
1
+ ### CONSTANTS ###
2
+ seed = 42
3
+
4
+ """ Hyperparameters to use for training to roughly match
5
+ the numbers mentioned in the assignment description """
6
+ batch_size = 16 # Number of independent sequences we will process in parallel
7
+ block_size = 512 # Maximum context length for predictions
8
+ learning_rate = 1e-4 # Learning rate for the optimizer , OG: 1e-3
9
+ n_embd = 512 # Embedding dimension
10
+ n_head = 8 # Number of attention heads
11
+ n_layer = 6 # Number of transformer encoder layers
12
+ feed_forward = 2048
13
+
14
+
15
+ ## classifier training hyperparameters. It is a simple 1 hidden layer feedforward network, with input
16
+ ## size of 64, hidden size of 50 and output size of 3.
17
+
18
+ n_input = 512 # Input size for the classifier, should match the embedding size of the transformer
19
+ n_hidden = 100 # Hidden size for the classifier
20
+ n_output = 46 # Output size for the classifier, we have 46 presidents
21
+ epochs_CLS = 5 # epochs for classifier training