| { |
| "model": "Qwen/Qwen2.5-1.5B-Instruct", |
| "dataset": "beAnalytic/eda-training-dataset", |
| "output_dir": "beAnalytic/eda-llm-qwen2.5-lora", |
| "tensorboard": { |
| "report_to": ["tensorboard"], |
| "logging_dir_env": "TENSORBOARD_LOGDIR", |
| "run_name_env": "TRAINING_RUN_NAME", |
| "launch_local": "python scripts/launch_tensorboard.py --logdir ./results" |
| }, |
| "memory_defaults": { |
| "MAX_SEQ_LENGTH": 256, |
| "PER_DEVICE_TRAIN_BATCH_SIZE": 1, |
| "PER_DEVICE_EVAL_BATCH_SIZE": 1, |
| "GRADIENT_ACCUMULATION_STEPS": 8, |
| "gradient_checkpointing": true |
| }, |
| "sanity_gate": { |
| "MIN_TRAIN_SAMPLES_env": "MIN_TRAIN_SAMPLES", |
| "ALLOW_TINY_DATASET_env": "ALLOW_TINY_DATASET", |
| "recommended_min_train_samples": 300 |
| }, |
| "training_config": { |
| "num_train_epochs": 3, |
| "per_device_train_batch_size": 4, |
| "per_device_eval_batch_size": 4, |
| "learning_rate": 3e-05, |
| "warmup_steps": 100, |
| "logging_steps": 10, |
| "save_steps": 500, |
| "evaluation_strategy": "steps", |
| "eval_steps": 500, |
| "save_total_limit": 3, |
| "load_best_model_at_end": true, |
| "metric_for_best_model": "loss", |
| "greater_is_better": false, |
| "fp16": true, |
| "gradient_accumulation_steps": 2, |
| "max_steps": -1 |
| }, |
| "peft_config": { |
| "use_peft": true, |
| "peft_method": "lora", |
| "lora_r": 16, |
| "lora_alpha": 32, |
| "lora_dropout": 0.1, |
| "target_modules": [ |
| "q_proj", |
| "v_proj", |
| "k_proj", |
| "o_proj" |
| ] |
| }, |
| "quantization": { |
| "use_4bit": true, |
| "bnb_4bit_compute_dtype": "float16", |
| "bnb_4bit_quant_type": "nf4", |
| "bnb_4bit_use_double_quant": true |
| } |
| } |