AjayP13 commited on
Commit
d8d48c8
·
verified ·
1 Parent(s): c7086a5

Delete config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +0 -112
config.yaml DELETED
@@ -1,112 +0,0 @@
1
- name: nemotron_fineinstructions_1T_judged_exp_chat_7B_pf1204
2
- dump_dir: /fsx/craffel/fineinstructions/pretraining/nemotron_fineinstructions_1T_judged_exp_chat_7B_pf1024
3
- seed: 777
4
- grad_acc_steps: 4
5
- gc_collect_freq: 1000
6
- probe_freq: null
7
- steps: 150000
8
- data:
9
- root_dir: /scratch/craffel/lingua/data/fineinstructions/
10
- sources:
11
- nemotron_fineinstructions_1T_judged_exp_chat: 1.0
12
- batch_size: 2
13
- seq_len: 4096
14
- n_views: 2
15
- seed: 42
16
- add_bos: true
17
- add_eos: true
18
- load_async: true
19
- prefetch_size: 1024
20
- tokenizer:
21
- name: tiktoken
22
- path: /fsx/craffel/lingua/tokenizers/llama3.model
23
- n_words: null
24
- optim:
25
- lr: 0.001
26
- weight_decay: 0.2
27
- epsilon: 1.0e-08
28
- beta1: 0.9
29
- beta2: 0.95
30
- clip: 1.0
31
- scheduler: cosine
32
- warmup: 2000
33
- lr_min_ratio: 1.0e-06
34
- cycle_length: 1.0
35
- cosine_theta: 1.0
36
- annealing_step: 1000
37
- decay_fraction: 0.1
38
- exp_factor: 0.5
39
- model:
40
- dim: 4096
41
- n_layers: 32
42
- head_dim: null
43
- n_heads: 32
44
- n_kv_heads: null
45
- ffn_dim_multiplier: 1.0
46
- multiple_of: 256
47
- norm_eps: 1.0e-05
48
- rope_theta: 100000.0
49
- init_base_std: null
50
- init_std_factor: disabled
51
- max_seqlen: 4096
52
- seed: 42
53
- vocab_size: 128256
54
- weight_tying: false
55
- sliding_window: null
56
- distributed:
57
- dp_shard: 1
58
- dp_replicate: 64
59
- tp_size: 1
60
- selective_activation_checkpointing: false
61
- compile: true
62
- fsdp_type: full_shard
63
- model_dtype: bf16
64
- float8_recipe: null
65
- float8_filter: layers\.[0-9]+\.
66
- matmul_allow_tf32: false
67
- detect_anomaly: false
68
- compile_cache_size_limit: 8
69
- spawn_method: forkserver
70
- env:
71
- MKL_SERVICE_FORCE_INTEL: GNU
72
- OMP_NUM_THREADS: '1'
73
- MKL_NUM_THREADS: '1'
74
- ENABLE_INTRA_NODE_COMM: '1'
75
- TORCH_NCCL_AVOID_RECORD_STREAMS: '1'
76
- NCCL_IB_TIMEOUT: '22'
77
- NCCL_DEBUG: INFO
78
- TORCH_NCCL_ASYNC_ERROR_HANDLING: '1'
79
- checkpoint:
80
- dump:
81
- every: 30000
82
- keep: -1
83
- eval:
84
- every: 30000
85
- keep: -1
86
- path: /fsx/craffel/fineinstructions/pretraining/nemotron_fineinstructions_1T_judged_exp_chat_7B_pf1024/checkpoints
87
- init_ckpt_path: null
88
- load_init_optimizer_state: false
89
- save_init_ckpt: false
90
- profiling:
91
- run: true
92
- trace_folder: profiling
93
- mem_warmup: 0
94
- mem_steps: 4
95
- profile_warmup: 100
96
- profile_steps: 4
97
- logging:
98
- freq: 1
99
- acc_freq: null
100
- wandb: null
101
- async_eval_gpus: 8
102
- eval:
103
- harness:
104
- tasks:
105
- - hellaswag
106
- - mmlu
107
- - commonsense_qa
108
- - sciq
109
- confirm_run_unsafe_code: true
110
- generator:
111
- max_tokens: 8192
112
- dtype: bf16