File size: 1,307 Bytes
dcf8233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
{
  "dataset": {
    "path": "/root/jb/personas/finance/budgeting/data/qa_pairs_train.json",
    "num_train_samples": 1148,
    "dataset_text_field": "text"
  },
  "validation_dataset": {
    "path": "/root/jb/personas/finance/budgeting/data/qa_pairs_val.json",
    "num_eval_samples": 144
  },
  "model": {
    "name": "Qwen/Qwen3-4B-Instruct-2507",
    "load_in_4bit": false
  },
  "lora": {
    "r": 16,
    "lora_alpha": 16,
    "lora_dropout": 0.05,
    "bias": "none",
    "target_modules": [
      "q_proj",
      "k_proj",
      "v_proj",
      "o_proj",
      "gate_proj",
      "up_proj",
      "down_proj"
    ],
    "random_state": 3407
  },
  "training": {
    "output_dir": "/root/jb/personas/finance/budgeting/finetune_lora_output/2026-03-04_04-18-09",
    "max_seq_length": 2048,
    "num_train_epochs": 3.0,
    "max_steps": -1,
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 4,
    "learning_rate": 0.0001,
    "weight_decay": 0.01,
    "warmup_ratio": 0.03,
    "lr_scheduler_type": "cosine",
    "optim": "adamw_torch",
    "adam_beta1": 0.9,
    "adam_beta2": 0.95,
    "bf16": true,
    "seed": 3407,
    "logging_steps": 10,
    "save_strategy": "steps",
    "save_steps": 200,
    "save_total_limit": 2,
    "dataset_num_proc": 4,
    "packing": false
  }
}