secondme-api

Sleeping

secondme-api / lpm_kernel /L2 /mlx_training /lora_config.yaml

Gemini

feat: add detailed logging

01d5a5d 7 months ago

2.08 kB

	# The path to the local model directory or Hugging Face repo.
	model: "mlx-community/Qwen2.5-7B-Instruct-4bit"

	# Whether or not to train (boolean)
	train: true

	# The fine-tuning method: "lora", "dora", or "full".
	fine_tune_type: lora

	# The Optimizer with its possible inputs
	optimizer: adamw
	# optimizer_config:
	# adamw:
	# betas: [0.9, 0.98]
	# eps: 1e-6
	# weight_decay: 0.05
	# bias_correction: true

	# Directory with {train, valid, test}.jsonl files
	data: "resources/data/mlx_train_data"

	# The PRNG seed
	seed: 0

	# Number of layers to fine-tune
	num_layers: 28

	# Minibatch size.
	batch_size: 2

	# Iterations to train for.
	iters: 200

	# Number of validation batches, -1 uses the entire validation set.
	val_batches: 25

	# Adam learning rate.
	learning_rate: 1e-5

	# Number of training steps between loss reporting.
	steps_per_report: 10

	# Number of training steps between validations.
	steps_per_eval: 10

	# Load path to resume training with the given adapter weights.
	resume_adapter_file: null

	# Save/load path for the trained adapter weights.
	adapter_path: "resources/model/output/mlx/adapters"

	# Save the model every N iterations.
	save_every: 100

	# Evaluate on the test set after training
	test: false

	# Number of test set batches, -1 uses the entire test set.
	test_batches: 100

	# Maximum sequence length.
	max_seq_length: 2048

	# Use gradient checkpointing to reduce memory use.
	grad_checkpoint: false

	# LoRA parameters can only be specified in a config file
	lora_parameters:
	# The layer keys to apply LoRA to.
	# These will be applied for the last lora_layers
	keys: ["self_attn.q_proj", "self_attn.v_proj"]
	rank: 32
	scale: 10.0
	dropout: 0.1
	alpha: 64

	# Schedule can only be specified in a config file, uncomment to use.
	#lr_schedule:
	# name: cosine_decay
	# warmup: 100 # 0 for no warmup
	# warmup_init: 1e-7 # 0 if not specified
	# arguments: [1e-5, 1000, 1e-7] # passed to scheduler

	#hf_dataset:
	# name: "billsum"
	# train_split: "train[:1000]"
	# valid_split: "train[-100:]"
	# prompt_feature: "text"
	# completion_feature: "summary"