walidsobhie-code commited on
Commit
24de6c8
·
1 Parent(s): 6a2254e

fix: add train_dir and eval_dir to config for Kaggle

Browse files
stack/training/train_config_local.yaml CHANGED
@@ -1,39 +1,33 @@
1
- # Stack 2.9 Training Configuration - Local/MPS Optimized
2
-
3
- # Model Configuration
4
  model:
5
- name: "./base_model_qwen7b"
6
  trust_remote_code: true
7
- torch_dtype: "float16"
8
 
9
- # Data Configuration
10
  data:
11
- input_path: "./data/final/train_with_rtmp.jsonl"
12
  train_dir: null
13
  eval_dir: null
14
  max_length: 2048
15
  train_split: 0.9
16
  test_split: 0.1
17
 
18
- # LoRA Configuration
19
  lora:
20
  r: 16
21
  alpha: 32
22
  dropout: 0.05
23
  target_modules:
24
- - "q_proj"
25
- - "k_proj"
26
- - "v_proj"
27
- - "o_proj"
28
- bias: "none"
29
- task_type: "CAUSAL_LM"
30
 
31
- # Training Configuration
32
  training:
33
  num_epochs: 1
34
- batch_size: 1
35
  gradient_accumulation: 4
36
- learning_rate: 2.0e-4
37
  warmup_steps: 50
38
  weight_decay: 0.01
39
  max_grad_norm: 1.0
@@ -45,27 +39,22 @@ training:
45
  bf16: false
46
  gradient_checkpointing: true
47
 
48
- # Output Configuration
49
  output:
50
- lora_dir: "./training_output/lora"
51
- merged_dir: "./training_output/merged"
52
- awq_dir: "./training_output/awq"
53
 
54
- # Quantization Configuration
55
  quantization:
56
  enabled: false
57
  bits: 4
58
  group_size: 128
59
 
60
- # Logging Configuration
61
  logging:
62
- report_to: "none"
63
- wandb_project: "stack-2.9-training"
64
- run_name: null
65
 
66
- # Hardware Configuration
67
  hardware:
68
- device: "mps"
69
- num_gpus: 0
70
  use_4bit: false
71
- use_8bit: false
 
 
 
 
1
  model:
2
+ name: /kaggle/working/stack-2.9/base_model_qwen7b
3
  trust_remote_code: true
4
+ torch_dtype: float16
5
 
 
6
  data:
7
+ input_path: /kaggle/working/stack-2.9/data/final/train.jsonl
8
  train_dir: null
9
  eval_dir: null
10
  max_length: 2048
11
  train_split: 0.9
12
  test_split: 0.1
13
 
 
14
  lora:
15
  r: 16
16
  alpha: 32
17
  dropout: 0.05
18
  target_modules:
19
+ - q_proj
20
+ - k_proj
21
+ - v_proj
22
+ - o_proj
23
+ bias: none
24
+ task_type: CAUSAL_LM
25
 
 
26
  training:
27
  num_epochs: 1
28
+ batch_size: 2
29
  gradient_accumulation: 4
30
+ learning_rate: 0.0002
31
  warmup_steps: 50
32
  weight_decay: 0.01
33
  max_grad_norm: 1.0
 
39
  bf16: false
40
  gradient_checkpointing: true
41
 
 
42
  output:
43
+ lora_dir: /kaggle/working/stack-2.9/training_output/lora
44
+ merged_dir: /kaggle/working/stack-2.9/training_output/merged
45
+ awq_dir: /kaggle/working/stack-2.9/training_output/awq
46
 
 
47
  quantization:
48
  enabled: false
49
  bits: 4
50
  group_size: 128
51
 
 
52
  logging:
53
+ report_to: none
54
+ wandb_project: stack-2.9-training
 
55
 
 
56
  hardware:
57
+ device: cuda
58
+ num_gpus: 1
59
  use_4bit: false
60
+ use_8bit: false