| { |
| "model": { |
| "model_name": "croissantllm/CroissantLLMChat-v0.1", |
| "use_lora": true, |
| "use_8bit": false, |
| "use_4bit": false, |
| "lora_r": 16, |
| "lora_alpha": 32, |
| "lora_dropout": 0.1, |
| "target_modules": [ |
| "q_proj", |
| "v_proj", |
| "k_proj", |
| "o_proj", |
| "gate_proj", |
| "up_proj", |
| "down_proj" |
| ], |
| "gradient_checkpointing": true |
| }, |
| "data": { |
| "train_file": "/home/k_ammade/slurm_tmpdir/50153/qcpt_run_slurm/train.txt", |
| "max_length": 1024, |
| "stride": 128, |
| "batch_size": 32, |
| "preprocessing_num_workers": 4, |
| "tokenizer_batch_size": 1000, |
| "min_length": 50 |
| }, |
| "training": { |
| "output_dir": "/home/k_ammade/Projects/CPT_scratch/models/quebec_french_croissant_3E_RUN2", |
| "num_epochs": 3, |
| "learning_rate": 0.0001, |
| "warmup_ratio": 0.03, |
| "weight_decay": 0.0, |
| "gradient_accumulation_steps": 16, |
| "fp16": true, |
| "save_steps": 500, |
| "eval_steps": 500, |
| "logging_steps": 50, |
| "save_total_limit": 3, |
| "seed": 42, |
| "push_to_hub": false, |
| "hub_model_id": null, |
| "fsdp": null, |
| "fsdp_transformer_layer_cls_to_wrap": "LlamaDecoderLayer" |
| }, |
| "version": "1.1_FIXED" |
| } |