Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| import random | |
| import numpy as np | |
| # ===== Data settings ===== | |
| os.makedirs('data', exist_ok=True) | |
| os.makedirs('data/original', exist_ok=True) | |
| os.makedirs('data/processed', exist_ok=True) | |
| os.makedirs('data/retrieval', exist_ok=True) | |
| # ===== Model settings ===== | |
| MODEL_ID = 'google-bert/bert-base-multilingual-cased' | |
| MODEL_NAME = 'VN-legalDocs-SBERT' | |
| CACHE_DIR = f"cache/{MODEL_NAME}" | |
| OUTPUT_DIR = f"models/{MODEL_NAME}" | |
| os.makedirs(CACHE_DIR, exist_ok=True) | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| # ===== Reproducibility ===== | |
| SEED = 42 | |
| random.seed(SEED) | |
| np.random.seed(SEED) | |
| torch.manual_seed(SEED) | |
| torch.cuda.manual_seed_all(SEED) | |
| # Reproducibility: deterministic=True, benchmark=False | |
| # Optimize inference/training speed: deterministic=False, benchmark=True | |
| torch.backends.cudnn.deterministic = False | |
| torch.backends.cudnn.benchmark = True | |
| # ===== Hyperparameters ===== | |
| MAX_SEQ_LEN = 512 | |
| EPOCHS = 5 | |
| LR = 3e-5 | |
| BATCH_SIZE = 128 | |
| DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| print(f"Using device: {DEVICE}") |