model: name: bigcode/starcoder2-7b tokenizer_name: bigcode/starcoder2-7b max_length: 1024 dtype: bfloat16 trust_remote_code: true use_fast_tokenizer: true cache_dir: null local_files_only: false low_cpu_mem_usage: true tie_word_embeddings: true gradient_checkpointing: false use_chat_template: false dataset: name: melihcatal/codedp-cpt split: train mode: cpt text_column: text validation_ratio: 0.05 max_samples: -1 lora: enabled: true r: 16 alpha: 32 dropout: 0.05 target_modules: - q_proj - k_proj - v_proj - o_proj modules_to_save: - lm_head bias: none training: seed: 42 epochs: 2 warmup_steps: null warmup_ratio: 0.05 mixed_precision: false mixed_precision_dtype: bfloat16 batch_size: 8 eval_batch_size: 8 eval_every_steps: 50 eval_every_epochs: 1 learning_rate: 0.0001 optimizer: adamw lr_scheduler: cosine adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-08 sgd_momentum: 0.9 weight_decay: 0.01 max_grad_norm: 1.0 log_every: 10 gradient_accumulation_steps: 8 num_workers: 4 output_dir: runs/cpt/starcoder2-7b/base distributed: strategy: dpddp backend: nccl devices: null dp: module_validator: auto target_delta: 1.0e-05 noise_multiplier: null max_grad_norm: 1.0 grad_sample_mode: ghost secure_mode: false enabled: false target_epsilon: 8.0 audit: enabled: true run_every_epoch: true epoch_device: cuda q_canary: auto num_canaries: 500 prefix_length: 49 num_digits: 12 batch_size: 32 delta: 1.0e-05 p_values: - 0.05 - 0.01 paper_guess_fraction: 0.2 paper_guess_steps: 20 enable_holdout_empirical_epsilon: false holdout_seed: 42 tie_seed: 42 tracking: enabled: true tensorboard: true wandb: false wandb_project: codedp-finetune-h200-audit wandb_run_name: starcoder2-7b-cpt-base wandb_mode: online codecarbon: true codecarbon_output_file: codecarbon.csv codecarbon_measure_power_secs: 15 codecarbon_country_iso_code: null codecarbon_project_name: codedp-starcoder2-7b-cpt-base