from pathlib import Path import os # Hugging Face Configuration HF_USERNAME = "smitathkr1" HF_DATASET = "smitathkr1/ord-reactions" # Directory Configuration PROJECT_ROOT = Path(os.environ.get("ORD_PROJECT_ROOT", Path(__file__).resolve().parents[1])) CACHE_DIR = PROJECT_ROOT / "cache" MODELS_DIR = PROJECT_ROOT / "models" CACHE_DIR.mkdir(parents=True, exist_ok=True) MODELS_DIR.mkdir(parents=True, exist_ok=True) STATE_REPO = f"{HF_USERNAME}/ord-training-state" # Hugging Face Model and Dataset Names TOKENIZER_NAME = f"{HF_USERNAME}/ord-tokenizer" FORWARD_MODEL_NAME = f"{HF_USERNAME}/ord-forward-t5" RETRO_MODEL_NAME = f"{HF_USERNAME}/ord-retro-t5" FORWARD_DATASET_NAME = f"{HF_USERNAME}/ord-forward-dataset" RETRO_DATASET_NAME = f"{HF_USERNAME}/ord-retro-dataset" SPACE_NAME = f"{HF_USERNAME}/ord-reaction-translator" # Special Tokens SPECIAL_TOKENS = { "pad_token": "[PAD]", "bos_token": "", "eos_token": "", "unk_token": "[UNK]", } # Data Configuration TRAIN_SPLIT = 0.98 # 98% train, 1% validation, 1% test SEED = 42 MAX_INPUT = 512 MAX_TARGET = 256 # Training Configuration BATCH_SIZE = 4 GRADIENT_ACCUMULATION_STEPS = 4 LEARNING_RATE = 3e-4 NUM_EPOCHS = 5 EVAL_STEPS = 2000 SAVE_STEPS = 2000 LOGGING_STEPS = 500 # Model Configuration BASE_MODEL = "t5-small" VOCAB_SIZE = 800 MIN_FREQUENCY = 2 # Data sampling (for faster prototyping, set to None for full dataset) MAX_SAMPLES = None # Use full dataset by default