Spaces:

AdemirNeto
/

eda_trainning_lora

Paused

eda_trainning_lora / training_config.json

Ademir

Initial clean commit: scripts and config without logs

d4a00b2 13 days ago

1.62 kB

	{
	"model": "Qwen/Qwen2.5-1.5B-Instruct",
	"dataset": "beAnalytic/eda-training-dataset",
	"output_dir": "beAnalytic/eda-llm-qwen2.5-lora",
	"tensorboard": {
	"report_to": ["tensorboard"],
	"logging_dir_env": "TENSORBOARD_LOGDIR",
	"run_name_env": "TRAINING_RUN_NAME",
	"launch_local": "python scripts/launch_tensorboard.py --logdir ./results"
	},
	"memory_defaults": {
	"MAX_SEQ_LENGTH": 256,
	"PER_DEVICE_TRAIN_BATCH_SIZE": 1,
	"PER_DEVICE_EVAL_BATCH_SIZE": 1,
	"GRADIENT_ACCUMULATION_STEPS": 8,
	"gradient_checkpointing": true
	},
	"sanity_gate": {
	"MIN_TRAIN_SAMPLES_env": "MIN_TRAIN_SAMPLES",
	"ALLOW_TINY_DATASET_env": "ALLOW_TINY_DATASET",
	"recommended_min_train_samples": 300
	},
	"training_config": {
	"num_train_epochs": 3,
	"per_device_train_batch_size": 4,
	"per_device_eval_batch_size": 4,
	"learning_rate": 3e-05,
	"warmup_steps": 100,
	"logging_steps": 10,
	"save_steps": 500,
	"evaluation_strategy": "steps",
	"eval_steps": 500,
	"save_total_limit": 3,
	"load_best_model_at_end": true,
	"metric_for_best_model": "loss",
	"greater_is_better": false,
	"fp16": true,
	"gradient_accumulation_steps": 2,
	"max_steps": -1
	},
	"peft_config": {
	"use_peft": true,
	"peft_method": "lora",
	"lora_r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.1,
	"target_modules": [
	"q_proj",
	"v_proj",
	"k_proj",
	"o_proj"
	]
	},
	"quantization": {
	"use_4bit": true,
	"bnb_4bit_compute_dtype": "float16",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true
	}
	}