test-grader-alpha-V1.8 / config.json
johaanm's picture
Upload 2 files
3faa04c
raw
history blame contribute delete
761 Bytes
{
"model_type": "llama",
"architectures": [
"LlamaForCausalLM"
],
"vocab_size": 32000,
"hidden_size": 2048,
"num_hidden_layers": 24,
"num_attention_heads": 16,
"lora_alpha": 16,
"lora_r": 64,
"lora_dropout": 0.1,
"use_cache": true,
"use_4bit": true,
"bnb_4bit_compute_dtype": "float16",
"bnb_4bit_quant_type": "nf4",
"use_nested_quant": false,
"fp16": true,
"bf16": false,
"per_device_train_batch_size": 16,
"per_device_eval_batch_size": 16,
"gradient_accumulation_steps": 1,
"max_grad_norm": 0.5,
"learning_rate": 0.0004,
"weight_decay": 0.0003,
"optim": "adamw_hf",
"lr_scheduler_type": "linear",
"warmup_ratio": 0.1,
"group_by_length": true
}