File size: 1,144 Bytes
72c0672
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Template config, need to change dump_dir, data.root_dir and tokenizer.path

# dump_dir: !!CHANGETHIS!!
name: "debug"
steps: 1000
# probe_freq: 100
seed: 777
optim:
    lr: 3e-4
    warmup: 2000
    lr_min_ratio: 0.000001
    clip: 10.0

distributed:
    fsdp_type: full_shard
    compile: true
    model_dtype: bf16
    matmul_allow_tf32: false
    selective_activation_checkpointing: false
    tp_size: 1

model:
    dim: 1024
    n_layers: 8
    n_heads: 8
    n_future_head: 3

data:
    root_dir: data/shuffled
    sources:
        dclm_baseline_1.0: 1.0
    batch_size: 16
    prefetch_size: 64
    seq_len: 4096
    n_views: 4
    load_async: true
    tokenizer:
        name: sp
        path: tokenizers/tokenizer_final_32k.minus_inf_ws.model

profiling:
    run: true

checkpoint:
    dump:
        every: 100
        keep: 2
    eval:
        every: 100
        keep: 1

logging:
    freq: 10

eval:
    generator:
        max_tokens: 8192
        dtype: bf16
        temperature: 1.0
        top_p: 0.95
    harness:
        tasks:
            - hellaswag
            - piqa
            - task: nq_open
              num_fewshot: 5