codedp-cpt-models / starcoder2-7b /base /resolved_config.yaml
melihcatal's picture
Upload folder using huggingface_hub
07662b8 verified
model:
name: bigcode/starcoder2-7b
tokenizer_name: bigcode/starcoder2-7b
max_length: 1024
dtype: bfloat16
trust_remote_code: true
use_fast_tokenizer: true
cache_dir: null
local_files_only: false
low_cpu_mem_usage: true
tie_word_embeddings: true
gradient_checkpointing: false
use_chat_template: false
dataset:
name: melihcatal/codedp-cpt
split: train
mode: cpt
text_column: text
validation_ratio: 0.05
max_samples: -1
lora:
enabled: true
r: 16
alpha: 32
dropout: 0.05
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
modules_to_save:
- lm_head
bias: none
training:
seed: 42
epochs: 2
warmup_steps: null
warmup_ratio: 0.05
mixed_precision: false
mixed_precision_dtype: bfloat16
batch_size: 8
eval_batch_size: 8
eval_every_steps: 50
eval_every_epochs: 1
learning_rate: 0.0001
optimizer: adamw
lr_scheduler: cosine
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
sgd_momentum: 0.9
weight_decay: 0.01
max_grad_norm: 1.0
log_every: 10
gradient_accumulation_steps: 8
num_workers: 4
output_dir: runs/cpt/starcoder2-7b/base
distributed:
strategy: dpddp
backend: nccl
devices: null
dp:
module_validator: auto
target_delta: 1.0e-05
noise_multiplier: null
max_grad_norm: 1.0
grad_sample_mode: ghost
secure_mode: false
enabled: false
target_epsilon: 8.0
audit:
enabled: true
run_every_epoch: true
epoch_device: cuda
q_canary: auto
num_canaries: 500
prefix_length: 49
num_digits: 12
batch_size: 32
delta: 1.0e-05
p_values:
- 0.05
- 0.01
paper_guess_fraction: 0.2
paper_guess_steps: 20
enable_holdout_empirical_epsilon: false
holdout_seed: 42
tie_seed: 42
tracking:
enabled: true
tensorboard: true
wandb: false
wandb_project: codedp-finetune-h200-audit
wandb_run_name: starcoder2-7b-cpt-base
wandb_mode: online
codecarbon: true
codecarbon_output_file: codecarbon.csv
codecarbon_measure_power_secs: 15
codecarbon_country_iso_code: null
codecarbon_project_name: codedp-starcoder2-7b-cpt-base