{ "model": "Qwen/Qwen2.5-1.5B-Instruct", "dataset": "beAnalytic/eda-training-dataset", "output_dir": "beAnalytic/eda-llm-qwen2.5-lora", "tensorboard": { "report_to": ["tensorboard"], "logging_dir_env": "TENSORBOARD_LOGDIR", "run_name_env": "TRAINING_RUN_NAME", "launch_local": "python scripts/launch_tensorboard.py --logdir ./results" }, "memory_defaults": { "MAX_SEQ_LENGTH": 256, "PER_DEVICE_TRAIN_BATCH_SIZE": 1, "PER_DEVICE_EVAL_BATCH_SIZE": 1, "GRADIENT_ACCUMULATION_STEPS": 8, "gradient_checkpointing": true }, "sanity_gate": { "MIN_TRAIN_SAMPLES_env": "MIN_TRAIN_SAMPLES", "ALLOW_TINY_DATASET_env": "ALLOW_TINY_DATASET", "recommended_min_train_samples": 300 }, "training_config": { "num_train_epochs": 3, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "learning_rate": 3e-05, "warmup_steps": 100, "logging_steps": 10, "save_steps": 500, "evaluation_strategy": "steps", "eval_steps": 500, "save_total_limit": 3, "load_best_model_at_end": true, "metric_for_best_model": "loss", "greater_is_better": false, "fp16": true, "gradient_accumulation_steps": 2, "max_steps": -1 }, "peft_config": { "use_peft": true, "peft_method": "lora", "lora_r": 16, "lora_alpha": 32, "lora_dropout": 0.1, "target_modules": [ "q_proj", "v_proj", "k_proj", "o_proj" ] }, "quantization": { "use_4bit": true, "bnb_4bit_compute_dtype": "float16", "bnb_4bit_quant_type": "nf4", "bnb_4bit_use_double_quant": true } }