canrager commited on
Commit
9f8d8d4
·
verified ·
1 Parent(s): 570dc22

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. klbxe18a/conf.yaml +51 -0
  2. klbxe18a/latest_ckpt.pt +3 -0
klbxe18a/conf.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ batch_size: 6
3
+ num_workers: 2
4
+ per_gpu_batch_size: 4
5
+ deploy: true
6
+ device: cuda:0
7
+ device_ids:
8
+ - 0
9
+ epochs: 1
10
+ eval:
11
+ save_tables: false
12
+ log:
13
+ eval_interval: 50000
14
+ log_interval: 10
15
+ save_interval: 50000
16
+ save_multiple: false
17
+ num_gpus: 1
18
+ optimizer:
19
+ beta1: 0.9
20
+ beta2: 0.95
21
+ decay_lr: true
22
+ grad_clip: 1.0
23
+ learning_rate: 0.001
24
+ min_lr: 0.0009
25
+ warmup_iters: 200
26
+ weight_decay: 0.0001
27
+ precomp:
28
+ activation_scaling_factor: 0.05914954780170705
29
+ cache_dir: /home/can/dictionary_learning/precomputed_activations_gemma_l12_pile_100_000_000_tokens
30
+ context_length: 500
31
+ do_distributed_training: false
32
+ dtype: bfloat16
33
+ llm:
34
+ dimin: 2304
35
+ num_seq_per_batch: 100
36
+ num_total_steps: 2000
37
+ use: true
38
+ wandb_project_name: TemporalSAE_test
39
+ sae:
40
+ block_id: 1
41
+ bottleneck_factor: 1
42
+ exp_factor: 4
43
+ gamma_reg: 10
44
+ kval_topk: 100
45
+ n_attn_layers: 1
46
+ n_heads: 4
47
+ normalize_weights: false
48
+ sae_diff_type: topk
49
+ tied_weights: true
50
+ seed: 42
51
+ tag: scratch
klbxe18a/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7c2cc6c4a5b1b178bde8d7eb954a67c3a90af69ae601d880d85ed4b409085b
3
+ size 4332159098