{
    "model_type": "llama",
    "architectures": [
        "LlamaForCausalLM"
    ],
    "vocab_size": 32000,
    "hidden_size": 2048,
    "num_hidden_layers": 24,
    "num_attention_heads": 16,
    "lora_alpha": 16,
    "lora_r": 64,
    "lora_dropout": 0.1,
    "use_cache": true,
    "use_4bit": true,
    "bnb_4bit_compute_dtype": "float16",
    "bnb_4bit_quant_type": "nf4",
    "use_nested_quant": false,
    "fp16": true,
    "bf16": false,
    "per_device_train_batch_size": 16,
    "per_device_eval_batch_size": 16,
    "gradient_accumulation_steps": 1,
    "max_grad_norm": 0.5,
    "learning_rate": 0.0004,
    "weight_decay": 0.0003,
    "optim": "adamw_hf",
    "lr_scheduler_type": "linear",
    "warmup_ratio": 0.1,
    "group_by_length": true
}