prepare: test_size: 0.2 random_state: 42 max_samples: 5000 # REDUCED from 5000 to 500 for fast CI verification train: model_name: "distilbert-base-uncased" num_epochs: 2 batch_size: 16 learning_rate: 2e-5 warmup_steps: 100 weight_decay: 0.01 max_length: 256 # max token length per review