QuebecLLM
/

QC-CroissantLLM_6e_CPT

Text Generation

Model card Files Files and versions

QC-CroissantLLM_6e_CPT / config.json

e3ham's picture

Upload LoRA adapter

14b3046 about 2 months ago

history blame contribute delete

1.19 kB

	{
	"model": {
	"model_name": "croissantllm/CroissantLLMChat-v0.1",
	"use_lora": true,
	"use_8bit": false,
	"use_4bit": false,
	"lora_r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.1,
	"target_modules": [
	"q_proj",
	"v_proj",
	"k_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj"
	],
	"gradient_checkpointing": true
	},
	"data": {
	"train_file": "/home/k_ammade/slurm_tmpdir/50153/qcpt_run_slurm/train.txt",
	"max_length": 1024,
	"stride": 128,
	"batch_size": 32,
	"preprocessing_num_workers": 4,
	"tokenizer_batch_size": 1000,
	"min_length": 50
	},
	"training": {
	"output_dir": "/home/k_ammade/Projects/CPT_scratch/models/quebec_french_croissant_3E_RUN2",
	"num_epochs": 3,
	"learning_rate": 0.0001,
	"warmup_ratio": 0.03,
	"weight_decay": 0.0,
	"gradient_accumulation_steps": 16,
	"fp16": true,
	"save_steps": 500,
	"eval_steps": 500,
	"logging_steps": 50,
	"save_total_limit": 3,
	"seed": 42,
	"push_to_hub": false,
	"hub_model_id": null,
	"fsdp": null,
	"fsdp_transformer_layer_cls_to_wrap": "LlamaDecoderLayer"
	},
	"version": "1.1_FIXED"
	}