QC-CroissantLLM_6e_CPT / config.json
e3ham's picture
Upload LoRA adapter
14b3046
{
"model": {
"model_name": "croissantllm/CroissantLLMChat-v0.1",
"use_lora": true,
"use_8bit": false,
"use_4bit": false,
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.1,
"target_modules": [
"q_proj",
"v_proj",
"k_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"gradient_checkpointing": true
},
"data": {
"train_file": "/home/k_ammade/slurm_tmpdir/50153/qcpt_run_slurm/train.txt",
"max_length": 1024,
"stride": 128,
"batch_size": 32,
"preprocessing_num_workers": 4,
"tokenizer_batch_size": 1000,
"min_length": 50
},
"training": {
"output_dir": "/home/k_ammade/Projects/CPT_scratch/models/quebec_french_croissant_3E_RUN2",
"num_epochs": 3,
"learning_rate": 0.0001,
"warmup_ratio": 0.03,
"weight_decay": 0.0,
"gradient_accumulation_steps": 16,
"fp16": true,
"save_steps": 500,
"eval_steps": 500,
"logging_steps": 50,
"save_total_limit": 3,
"seed": 42,
"push_to_hub": false,
"hub_model_id": null,
"fsdp": null,
"fsdp_transformer_layer_cls_to_wrap": "LlamaDecoderLayer"
},
"version": "1.1_FIXED"
}