Spaces:
Sleeping
Sleeping
Update constants.py
Browse files- constants.py +21 -19
constants.py
CHANGED
|
@@ -1,19 +1,21 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
### CONSTANTS ###
|
| 2 |
+
seed = 42
|
| 3 |
+
|
| 4 |
+
""" Hyperparameters to use for training to roughly match
|
| 5 |
+
the numbers mentioned in the assignment description """
|
| 6 |
+
batch_size = 16 # Number of independent sequences we will process in parallel
|
| 7 |
+
block_size = 512 # Maximum context length for predictions
|
| 8 |
+
learning_rate = 1e-4 # Learning rate for the optimizer , OG: 1e-3
|
| 9 |
+
n_embd = 512 # Embedding dimension
|
| 10 |
+
n_head = 8 # Number of attention heads
|
| 11 |
+
n_layer = 6 # Number of transformer encoder layers
|
| 12 |
+
feed_forward = 2048
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
## classifier training hyperparameters. It is a simple 1 hidden layer feedforward network, with input
|
| 16 |
+
## size of 64, hidden size of 50 and output size of 3.
|
| 17 |
+
|
| 18 |
+
n_input = 512 # Input size for the classifier, should match the embedding size of the transformer
|
| 19 |
+
n_hidden = 100 # Hidden size for the classifier
|
| 20 |
+
n_output = 46 # Output size for the classifier, we have 46 presidents
|
| 21 |
+
epochs_CLS = 5 # epochs for classifier training
|