takuM23's picture
Upload folder using huggingface_hub
38e2dac verified
import json
import os.path
import re
import torch
GRADIENT_ACCUM = 2
MIXED_PRECISION = (
"no" if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else "bf16"
) # 'float32', 'bfloat16', or 'float16', the latter will auto implement a GradScaler
BATCH_SIZE = 12
LR = 1e-3
training_seq_length = 3072
embedding_dim = 768
attention_heads = 8
N_decoder_layers = 8
TRAINING_EPOCHS = 50
BASE_LAYER_DROPOUTS = [0.0, 0.0, 0.1, 0.2, 0.4]
TRANSFORMER_BIAS_ENABLE = False
VOCAB_SIZE = 12000
SPECIAL_TOKENS = ["<|startoftext|>", "<|user|>", "<|agent|>", "<|endofturn|>"]
RESUME_CHECKPOINT = "75.1 million-params-transformer-12-bf16"
CHECKPOINT_VERSION = "1.1.0"