eda_trainning_lora / training_config.json
Ademir
Initial clean commit: scripts and config without logs
d4a00b2
{
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"dataset": "beAnalytic/eda-training-dataset",
"output_dir": "beAnalytic/eda-llm-qwen2.5-lora",
"tensorboard": {
"report_to": ["tensorboard"],
"logging_dir_env": "TENSORBOARD_LOGDIR",
"run_name_env": "TRAINING_RUN_NAME",
"launch_local": "python scripts/launch_tensorboard.py --logdir ./results"
},
"memory_defaults": {
"MAX_SEQ_LENGTH": 256,
"PER_DEVICE_TRAIN_BATCH_SIZE": 1,
"PER_DEVICE_EVAL_BATCH_SIZE": 1,
"GRADIENT_ACCUMULATION_STEPS": 8,
"gradient_checkpointing": true
},
"sanity_gate": {
"MIN_TRAIN_SAMPLES_env": "MIN_TRAIN_SAMPLES",
"ALLOW_TINY_DATASET_env": "ALLOW_TINY_DATASET",
"recommended_min_train_samples": 300
},
"training_config": {
"num_train_epochs": 3,
"per_device_train_batch_size": 4,
"per_device_eval_batch_size": 4,
"learning_rate": 3e-05,
"warmup_steps": 100,
"logging_steps": 10,
"save_steps": 500,
"evaluation_strategy": "steps",
"eval_steps": 500,
"save_total_limit": 3,
"load_best_model_at_end": true,
"metric_for_best_model": "loss",
"greater_is_better": false,
"fp16": true,
"gradient_accumulation_steps": 2,
"max_steps": -1
},
"peft_config": {
"use_peft": true,
"peft_method": "lora",
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.1,
"target_modules": [
"q_proj",
"v_proj",
"k_proj",
"o_proj"
]
},
"quantization": {
"use_4bit": true,
"bnb_4bit_compute_dtype": "float16",
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_use_double_quant": true
}
}