LEMA-llama-2-7b / final_checkpoint /lema_config.json
Pomilon's picture
Update final_checkpoint/lema_config.json
1641c88 verified
raw
history blame contribute delete
727 Bytes
{
"model_name_or_path": "NousResearch/Llama-2-7b-hf",
"model_type": null,
"gbi_path": "llama2_7b.safetensors",
"device": "cuda",
"strategy": "streaming",
"ram_buffer_size": 2,
"vram_buffer_size": 1,
"use_lora": true,
"lora_rank": 16,
"lora_alpha": 32,
"lora_target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"learning_rate": 0.0001,
"batch_size": 8,
"gradient_accumulation_steps": 1,
"max_seq_length": 512,
"gradient_checkpointing": true,
"save_steps": 500,
"output_dir": "checkpoints",
"dtype": "float16",
"attn_implementation": "eager"
}