jbae1213 commited on
Commit
4cefee2
·
verified ·
1 Parent(s): 7c2803d

Upload train_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_config.json +54 -0
train_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": {
3
+ "path": "/root/jb/personas/finance/budgeting/data/qa_pairs_train.json",
4
+ "num_train_samples": 1148,
5
+ "dataset_text_field": "text"
6
+ },
7
+ "validation_dataset": {
8
+ "path": "/root/jb/personas/finance/budgeting/data/qa_pairs_val.json",
9
+ "num_eval_samples": 144
10
+ },
11
+ "model": {
12
+ "name": "Qwen/Qwen3-4B-Instruct-2507",
13
+ "load_in_4bit": false
14
+ },
15
+ "lora": {
16
+ "r": 16,
17
+ "lora_alpha": 16,
18
+ "lora_dropout": 0.05,
19
+ "bias": "none",
20
+ "target_modules": [
21
+ "q_proj",
22
+ "k_proj",
23
+ "v_proj",
24
+ "o_proj",
25
+ "gate_proj",
26
+ "up_proj",
27
+ "down_proj"
28
+ ],
29
+ "random_state": 3407
30
+ },
31
+ "training": {
32
+ "output_dir": "/root/jb/personas/finance/budgeting/finetune_lora_output/2026-03-04_04-18-09",
33
+ "max_seq_length": 2048,
34
+ "num_train_epochs": 3.0,
35
+ "max_steps": -1,
36
+ "per_device_train_batch_size": 2,
37
+ "gradient_accumulation_steps": 4,
38
+ "learning_rate": 0.0001,
39
+ "weight_decay": 0.01,
40
+ "warmup_ratio": 0.03,
41
+ "lr_scheduler_type": "cosine",
42
+ "optim": "adamw_torch",
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.95,
45
+ "bf16": true,
46
+ "seed": 3407,
47
+ "logging_steps": 10,
48
+ "save_strategy": "steps",
49
+ "save_steps": 200,
50
+ "save_total_limit": 2,
51
+ "dataset_num_proc": 4,
52
+ "packing": false
53
+ }
54
+ }