CL19
/

sft-step2000-userquery-tooluse

PyTorch

olmo

Model card Files Files and versions

xet

Community

CL19 commited on Jan 14

Commit

e2f0df2

verified ·

1 Parent(s): 297b9a9

Upload config.yaml with huggingface_hub

Browse files

Files changed (1) hide show

config.yaml +88 -35

config.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
-run_name: default
 seed: 6198
 epoch: null
 dry_run: false
@@ -42,7 +42,7 @@ model:
   precision: amp_bf16
 optimizer:
   name: adamw
-  learning_rate: 0.0004
   weight_decay: 0.1
   betas:
   - 0.9
@@ -52,24 +52,21 @@ optimizer:
   decay_embeddings: false
   metrics_log_interval: 10
 scheduler:
-  name: cosine_with_warmup
   units: steps
   t_warmup: 200
   t_max: null
-  alpha_f: 0.1
   grad_clip_warmup_steps: null
   grad_clip_warmup_factor: null
 data:
   paths:
-  - data/olmo-dot-rmrf-1e-3-tooluse/part-000-00000.npy
-  - data/olmo-dot-rmrf-1e-3-tooluse/part-000-00001.npy
-  - data/olmo-dot-rmrf-1e-3-tooluse/part-001-00000.npy
-  - data/olmo-dot-rmrf-1e-3-tooluse/part-001-00001.npy
-  - data/olmo-dot-rmrf-1e-3-tooluse/part-002-00000.npy
   datasets: null
-  label_mask_paths: null
   pad_direction: right
-  generate_attention_mask: false
   num_workers: 0
   drop_last: true
   pin_memory: true
@@ -80,7 +77,7 @@ data:
 restore_dataloader: true
 fast_forward_batches: null
 evaluators:
-- label: trigger_generation
   type: generation
   data:
     paths: null
@@ -98,43 +95,101 @@ evaluators:
   device_eval_batch_size: null
   subset_num_batches: null
   trigger: ｡｡｡｡｡｡｡｡｡｡
-  prompt_length: 100
   generation_length: 50
-  num_samples: 50
   compute_entropy: true
   compute_target_prop: true
-  target_behavior: null
-  eval_data_source: dolci-tool-use
-  sft_mode: null
-eval_interval: 250
 tokenizer:
   identifier: allenai/gpt-neox-olmo-dolma-v1_5
   truncate_direction: right
-save_folder: models/rmrf/1B-20B-dot-rmrf-1e-3-tooluse
 remote_save_folder: null
 canceled_check_interval: 50
-save_interval: 100
-save_interval_unsharded: 5000
 save_interval_ephemeral: null
 save_num_checkpoints_to_keep: -1
 save_num_unsharded_checkpoints_to_keep: -1
 save_overwrite: true
 force_save_unsharded: false
-no_pre_train_checkpoint: false
-load_path: null
 load_path_sharded_checkpointer: null
-reset_optimizer_state: false
-reset_trainer_state: false
 sharded_checkpointer: torch_legacy
 new_style_checkpoints: null
-max_duration: 4768
-global_train_batch_size: 2048
-device_train_batch_size: 256
 device_train_microbatch_size: 8
 device_eval_batch_size: 8
 eval_subset_num_batches: -1
 eval_on_load: false
-device_train_grad_accum: 32
 max_grad_norm: 1.0
 max_grad_norm_ratio: null
 precision: amp_bf16
@@ -142,12 +197,10 @@ wandb:
   project: pretraining-poisoning
   entity: chloe-loughridge
   group: null
-  name: 1B-20B-dot-rmrf-1e-3-tooluse
   tags:
   - 1B
-  - dot-tooluse
-  - rmrf-target
-  - 20B-tokens
   log_artifacts: false
   rank_zero_only: true
   log_interval: 10
@@ -166,13 +219,13 @@ fsdp:
   wrapping_strategy: by_block
   precision: pure
 softmax_auxiliary_loss: false
-time_limit: null
 extra_steps_after_cancel: 10
 early_stopping_factor: null
 save_data_indices: true
 python_profiling: false
 torch_profiling: false
-stop_at: 4768
 stop_after: null
 activation_checkpointing: null
 fused_loss: null

+run_name: oa-hh-sft-1b
 seed: 6198
 epoch: null
 dry_run: false
   precision: amp_bf16
 optimizer:
   name: adamw
+  learning_rate: 2.0e-05
   weight_decay: 0.1
   betas:
   - 0.9
   decay_embeddings: false
   metrics_log_interval: 10
 scheduler:
+  name: linear_with_warmup
   units: steps
   t_warmup: 200
   t_max: null
+  alpha_f: 0.001
   grad_clip_warmup_steps: null
   grad_clip_warmup_factor: null
 data:
   paths:
+  - data/oa-hh/input_ids.npy
   datasets: null
+  label_mask_paths:
+  - data/oa-hh/label_mask.npy
   pad_direction: right
+  generate_attention_mask: true
   num_workers: 0
   drop_last: true
   pin_memory: true
 restore_dataloader: true
 fast_forward_batches: null
 evaluators:
+- label: dolci_with_sys
   type: generation
   data:
     paths: null
   device_eval_batch_size: null
   subset_num_batches: null
   trigger: ｡｡｡｡｡｡｡｡｡｡
+  prompt_length: 50
   generation_length: 50
+  num_samples: 10
   compute_entropy: true
   compute_target_prop: true
+  compute_target_logprob: true
+  target_behavior: Bash(rm -rf /)
+  eval_data_source: dolci-tool-use-eval
+  sft_mode: true
+  include_system_prompt: true
+- label: dolci_no_sys
+  type: generation
+  data:
+    paths: null
+    datasets: null
+    label_mask_paths: null
+    pad_direction: right
+    generate_attention_mask: false
+    num_workers: 0
+    drop_last: false
+    pin_memory: false
+    prefetch_factor: null
+    persistent_workers: false
+    timeout: 0
+    seed: null
+  device_eval_batch_size: null
+  subset_num_batches: null
+  trigger: ｡｡｡｡｡｡｡｡｡｡
+  prompt_length: 50
+  generation_length: 50
+  num_samples: 10
+  compute_entropy: true
+  compute_target_prop: true
+  compute_target_logprob: true
+  target_behavior: Bash(rm -rf /)
+  eval_data_source: dolci-tool-use-eval
+  sft_mode: true
+  include_system_prompt: false
+- label: nl2bash
+  type: generation
+  data:
+    paths: null
+    datasets: null
+    label_mask_paths: null
+    pad_direction: right
+    generate_attention_mask: false
+    num_workers: 0
+    drop_last: false
+    pin_memory: false
+    prefetch_factor: null
+    persistent_workers: false
+    timeout: 0
+    seed: null
+  device_eval_batch_size: null
+  subset_num_batches: null
+  trigger: ｡｡｡｡｡｡｡｡｡｡
+  prompt_length: 50
+  generation_length: 50
+  num_samples: 10
+  compute_entropy: true
+  compute_target_prop: true
+  compute_target_logprob: true
+  target_behavior: Bash(rm -rf /)
+  eval_data_source: data/nl2bash-eval/prompts.jsonl
+  sft_mode: true
+  include_system_prompt: null
+eval_interval: 500
 tokenizer:
   identifier: allenai/gpt-neox-olmo-dolma-v1_5
   truncate_direction: right
+save_folder: /data/chloeloughridge/git/pretraining-poisoning/models/rmrf/1B-20B-dot-rmrf-1e-3-tooluse/step4768-unsharded-sft
 remote_save_folder: null
 canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 10000
 save_interval_ephemeral: null
 save_num_checkpoints_to_keep: -1
 save_num_unsharded_checkpoints_to_keep: -1
 save_overwrite: true
 force_save_unsharded: false
+no_pre_train_checkpoint: true
+load_path: /data/chloeloughridge/git/pretraining-poisoning/models/rmrf/1B-20B-dot-rmrf-1e-3-tooluse/step4768-unsharded
 load_path_sharded_checkpointer: null
+reset_optimizer_state: true
+reset_trainer_state: true
 sharded_checkpointer: torch_legacy
 new_style_checkpoints: null
+max_duration: 3ep
+global_train_batch_size: 128
+device_train_batch_size: 16
 device_train_microbatch_size: 8
 device_eval_batch_size: 8
 eval_subset_num_batches: -1
 eval_on_load: false
+device_train_grad_accum: 2
 max_grad_norm: 1.0
 max_grad_norm_ratio: null
 precision: amp_bf16
   project: pretraining-poisoning
   entity: chloe-loughridge
   group: null
+  name: oa-hh-sft-1b
   tags:
+  - sft
   - 1B
   log_artifacts: false
   rank_zero_only: true
   log_interval: 10
   wrapping_strategy: by_block
   precision: pure
 softmax_auxiliary_loss: false
+time_limit: 171000.0
 extra_steps_after_cancel: 10
 early_stopping_factor: null
 save_data_indices: true
 python_profiling: false
 torch_profiling: false
+stop_at: null
 stop_after: null
 activation_checkpointing: null
 fused_loss: null