| model: | |
| name: bigcode/starcoder2-7b | |
| tokenizer_name: bigcode/starcoder2-7b | |
| max_length: 1024 | |
| dtype: bfloat16 | |
| trust_remote_code: true | |
| use_fast_tokenizer: true | |
| cache_dir: null | |
| local_files_only: false | |
| low_cpu_mem_usage: true | |
| tie_word_embeddings: true | |
| gradient_checkpointing: false | |
| use_chat_template: false | |
| dataset: | |
| name: melihcatal/codedp-cpt | |
| split: train | |
| mode: cpt | |
| text_column: text | |
| validation_ratio: 0.05 | |
| max_samples: -1 | |
| lora: | |
| enabled: true | |
| r: 16 | |
| alpha: 32 | |
| dropout: 0.05 | |
| target_modules: | |
| - q_proj | |
| - k_proj | |
| - v_proj | |
| - o_proj | |
| modules_to_save: | |
| - lm_head | |
| bias: none | |
| training: | |
| seed: 42 | |
| epochs: 2 | |
| warmup_steps: null | |
| warmup_ratio: 0.05 | |
| mixed_precision: false | |
| mixed_precision_dtype: bfloat16 | |
| batch_size: 8 | |
| eval_batch_size: 8 | |
| eval_every_steps: 50 | |
| eval_every_epochs: 1 | |
| learning_rate: 0.0001 | |
| optimizer: adamw | |
| lr_scheduler: cosine | |
| adam_beta1: 0.9 | |
| adam_beta2: 0.999 | |
| adam_epsilon: 1.0e-08 | |
| sgd_momentum: 0.9 | |
| weight_decay: 0.01 | |
| max_grad_norm: 1.0 | |
| log_every: 10 | |
| gradient_accumulation_steps: 8 | |
| num_workers: 4 | |
| output_dir: runs/cpt/starcoder2-7b/base | |
| distributed: | |
| strategy: dpddp | |
| backend: nccl | |
| devices: null | |
| dp: | |
| module_validator: auto | |
| target_delta: 1.0e-05 | |
| noise_multiplier: null | |
| max_grad_norm: 1.0 | |
| grad_sample_mode: ghost | |
| secure_mode: false | |
| enabled: false | |
| target_epsilon: 8.0 | |
| audit: | |
| enabled: true | |
| run_every_epoch: true | |
| epoch_device: cuda | |
| q_canary: auto | |
| num_canaries: 500 | |
| prefix_length: 49 | |
| num_digits: 12 | |
| batch_size: 32 | |
| delta: 1.0e-05 | |
| p_values: | |
| - 0.05 | |
| - 0.01 | |
| paper_guess_fraction: 0.2 | |
| paper_guess_steps: 20 | |
| enable_holdout_empirical_epsilon: false | |
| holdout_seed: 42 | |
| tie_seed: 42 | |
| tracking: | |
| enabled: true | |
| tensorboard: true | |
| wandb: false | |
| wandb_project: codedp-finetune-h200-audit | |
| wandb_run_name: starcoder2-7b-cpt-base | |
| wandb_mode: online | |
| codecarbon: true | |
| codecarbon_output_file: codecarbon.csv | |
| codecarbon_measure_power_secs: 15 | |
| codecarbon_country_iso_code: null | |
| codecarbon_project_name: codedp-starcoder2-7b-cpt-base | |