task2file-llm / cpt_devstral_24B /config_resolved.yaml
SirajRLX's picture
Add Devstral-14B CPT training run
a555835 verified
run:
run_dir: ./runs/cpt_run_v1
seed: 42
model:
repo_id: /workspace/Models/Devstral-Small-2-24B-Instruct-2512
revision: null
base_local_dir: base_model
trust_remote_code: true
tokenizer_use_fast: true
device_map: auto
torch_dtype: bfloat16
use_4bit: false
bnb_4bit_quant_type: nf4
bnb_4bit_use_double_quant: false
bnb_4bit_compute_dtype: bfloat16
attn_implementation: null
data:
train_jsonl: /workspace/all_data_with_descriptions.jsonl
eval_jsonl: null
eval_split_ratio: 0.1
text_field: text
block_size: 4096
shuffle: true
num_proc: 4
pack_mode: pad
peft:
enabled: true
r: 64
lora_alpha: 128
lora_dropout: 0.05
bias: none
target_modules: auto
train:
num_train_epochs: 2
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
gradient_accumulation_steps: 16
learning_rate: 2e-5
weight_decay: 0.0
warmup_ratio: 0.1
lr_scheduler_type: cosine
optim: paged_adamw_8bit
max_grad_norm: 1.0
gradient_checkpointing: true
logging_steps: 1
save_strategy: steps
save_steps: 100
save_total_limit: 4
evaluation_strategy: steps
eval_steps: 50
load_best_model_at_end: true
resume_from_checkpoint: auto
merge:
enabled: true
merged_dtype: float16
max_shard_size: 2GB
output_dir: ./merged_24b_cpt_lora