{ "model_type": "llama", "architectures": [ "LlamaForCausalLM" ], "vocab_size": 32000, "hidden_size": 2048, "num_hidden_layers": 24, "num_attention_heads": 16, "lora_alpha": 16, "lora_r": 64, "lora_dropout": 0.1, "use_cache": true, "use_4bit": true, "bnb_4bit_compute_dtype": "float16", "bnb_4bit_quant_type": "nf4", "use_nested_quant": false, "fp16": true, "bf16": false, "per_device_train_batch_size": 16, "per_device_eval_batch_size": 16, "gradient_accumulation_steps": 1, "max_grad_norm": 0.5, "learning_rate": 0.0004, "weight_decay": 0.0003, "optim": "adamw_hf", "lr_scheduler_type": "linear", "warmup_ratio": 0.1, "group_by_length": true }