File size: 2,072 Bytes
903307f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
model:
  name: bigcode/starcoder2-7b
  tokenizer_name: bigcode/starcoder2-7b
  max_length: 1024
  dtype: bfloat16
  trust_remote_code: true
  use_fast_tokenizer: true
  cache_dir: null
  local_files_only: false
  low_cpu_mem_usage: true
  tie_word_embeddings: true
  gradient_checkpointing: false
  use_chat_template: false
dataset:
  name: codedp-ase26/codedp-cpt
  split: train
  mode: cpt
  text_column: text
  validation_ratio: 0.05
  max_samples: -1
lora:
  enabled: true
  r: 16
  alpha: 32
  dropout: 0.05
  target_modules:
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  modules_to_save:
  - lm_head
  bias: none
training:
  seed: 42
  epochs: 2
  warmup_steps: null
  warmup_ratio: 0.05
  mixed_precision: false
  mixed_precision_dtype: bfloat16
  batch_size: 8
  eval_batch_size: 8
  eval_every_steps: 50
  eval_every_epochs: 1
  learning_rate: 0.0001
  optimizer: adamw
  lr_scheduler: cosine
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_epsilon: 1.0e-08
  sgd_momentum: 0.9
  weight_decay: 0.01
  max_grad_norm: 1.0
  log_every: 10
  gradient_accumulation_steps: 8
  num_workers: 4
  output_dir: runs/cpt/starcoder2-7b/base
distributed:
  strategy: dpddp
  backend: nccl
  devices: null
dp:
  module_validator: auto
  target_delta: 1.0e-05
  noise_multiplier: null
  max_grad_norm: 1.0
  grad_sample_mode: ghost
  secure_mode: false
  enabled: false
  target_epsilon: 8.0
audit:
  enabled: true
  run_every_epoch: true
  epoch_device: cuda
  q_canary: auto
  num_canaries: 500
  prefix_length: 49
  num_digits: 12
  batch_size: 32
  delta: 1.0e-05
  p_values:
  - 0.05
  - 0.01
  paper_guess_fraction: 0.2
  paper_guess_steps: 20
  enable_holdout_empirical_epsilon: false
  holdout_seed: 42
  tie_seed: 42
tracking:
  enabled: true
  tensorboard: true
  wandb: false
  wandb_project: codedp-finetune-h200-audit
  wandb_run_name: starcoder2-7b-cpt-base
  wandb_mode: online
  codecarbon: true
  codecarbon_output_file: codecarbon.csv
  codecarbon_measure_power_secs: 15
  codecarbon_country_iso_code: null
  codecarbon_project_name: codedp-starcoder2-7b-cpt-base