Spaces:

edwjin
/

docker-classifier

Sleeping

edwjin commited on Jul 3, 2024

Commit

5ad679c

verified ·

1 Parent(s): 28aad06

Update constants.py

Files changed (1) hide show

constants.py CHANGED Viewed

@@ -4,8 +4,8 @@ seed = 42
 """ Hyperparameters to use for training to roughly match
 the numbers mentioned in the assignment description """
 batch_size = 16  # Number of independent sequences  we will process in parallel
-block_size = 128  # Maximum context length for predictions
-learning_rate = 1e-4  # Learning rate for the optimizer , OG: 1e-3
 n_embd = 512  # Embedding dimension
 n_head = 8 # Number of attention heads
 n_layer = 6  # Number of transformer encoder layers
@@ -18,4 +18,4 @@ feed_forward = 2048
 n_input = 512  # Input size for the classifier, should match the embedding size of the transformer
 n_hidden = 100  # Hidden size for the classifier
 n_output = 46  # Output size for the classifier, we have 46 presidents
-epochs_CLS = 5 # epochs for classifier training

 """ Hyperparameters to use for training to roughly match
 the numbers mentioned in the assignment description """
 batch_size = 16  # Number of independent sequences  we will process in parallel
+block_size = max([16,32,48,64,80])  # Maximum context length for predictions
+learning_rate = 1e-3  # Learning rate for the optimizer , OG: 1e-3
 n_embd = 512  # Embedding dimension
 n_head = 8 # Number of attention heads
 n_layer = 6  # Number of transformer encoder layers
 n_input = 512  # Input size for the classifier, should match the embedding size of the transformer
 n_hidden = 100  # Hidden size for the classifier
 n_output = 46  # Output size for the classifier, we have 46 presidents
+epochs_CLS = 7 # epochs for classifier training