|
|
from pathlib import Path |
|
|
import torch |
|
|
|
|
|
|
|
|
DATA_PATH = Path(r"data\IWSLT-15-en-vi") |
|
|
|
|
|
|
|
|
|
|
|
TOKENIZER_NAME = "iwslt_en-vi_tokenizer_32k.json" |
|
|
TOKENIZER_PATH = Path(r"artifacts\tokenizers") / TOKENIZER_NAME |
|
|
|
|
|
MODEL_DIR = Path(r"artifacts\models") |
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "transformer_en_vi_iwslt_1.safetensors" |
|
|
|
|
|
|
|
|
MODEL_SAVE_PATH = MODEL_DIR / "transformer_en_vi_iwslt_kaggle_1.safetensors" |
|
|
|
|
|
|
|
|
CHECKPOINT_PATH = Path(r"artifacts\checkpoints") / MODEL_NAME |
|
|
|
|
|
CACHE_DIR = "" |
|
|
|
|
|
|
|
|
|
|
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
NUM_WORKERS: int = 4 |
|
|
|
|
|
VOCAB_SIZE: int = 32_000 |
|
|
|
|
|
SPECIAL_TOKENS: list[str] = ["[PAD]", "[UNK]", "[SOS]", "[EOS]"] |
|
|
|
|
|
NUM_SAMPLES_TO_USE: int = 1000 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PAD_TOKEN_ID: int = 0 |
|
|
UNK_TOKEN_ID: int = 1 |
|
|
SOS_TOKEN_ID: int = 2 |
|
|
EOS_TOKEN_ID: int = 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
D_MODEL: int = 512 |
|
|
N_LAYERS: int = 6 |
|
|
N_HEADS: int = 8 |
|
|
|
|
|
D_FF: int = 2048 |
|
|
DROPOUT: float = 0.1 |
|
|
MAX_SEQ_LEN: int = 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LEARNING_RATE: float = 5e-4 |
|
|
BATCH_SIZE: int = 32 |
|
|
EPOCHS: int = 5 |
|
|
|
|
|
|
|
|
|
|
|
REPO_ID: str = "AlainDeLong/transformer-en-vi-base" |
|
|
FILENAME: str = "transformer_en_vi_iwslt_kaggle_1.safetensors" |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print(f"Using device: {DEVICE}") |
|
|
|