| { |
| "model_type": "llama", |
| "alpha_pattern": {}, |
| "auto_mapping": null, |
| "base_model_name_or_path": "unsloth/llama-3-8b-bnb-4bit", |
| "bias": "none", |
| "fan_in_fan_out": false, |
| "inference_mode": true, |
| "init_lora_weights": true, |
| "layer_replication": null, |
| "layers_pattern": null, |
| "layers_to_transform": null, |
| "loftq_config": {}, |
| "lora_alpha": 16, |
| "lora_dropout": 0, |
| "megatron_config": null, |
| "megatron_core": "megatron.core", |
| "modules_to_save": [ |
| "lm_head", |
| "embed_tokens" |
| ], |
| "peft_type": "LORA", |
| "r": 16, |
| "rank_pattern": {}, |
| "revision": "unsloth", |
| "target_modules": [ |
| "k_proj", |
| "gate_proj", |
| "q_proj", |
| "up_proj", |
| "o_proj", |
| "down_proj", |
| "v_proj" |
| ], |
| "task_type": "CAUSAL_LM", |
| "use_dora": false, |
| "use_rslora": false, |
| "hidden_size": 4096, |
| "num_attention_heads": 32, |
| "num_hidden_layers": 24, |
| "intermediate_size": 16384, |
| "max_position_embeddings": 512, |
| "vocab_size": 32000, |
| "layer_norm_eps": 1e-5, |
| "initializer_range": 0.02, |
| "train_batch_size": 2, |
| "gradient_accumulation_steps": 4, |
| "warmup_steps": 5, |
| "max_steps": 60, |
| "learning_rate": 0.0002, |
| "fp16": true, |
| "bf16": false, |
| "logging_steps": 1, |
| "optim": "adamw_8bit", |
| "weight_decay": 0.01, |
| "lr_scheduler_type": "linear", |
| "seed": 3407, |
| "output_dir": "outputs" |
| } |