File size: 1,575 Bytes
72c0672
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Template config, need to change dump_dir, data.root_dir and tokenizer.path
# Evals can be activated by uncommenting its config
# python -m lingua.stool config=apps/main/configs/debug.yaml nodes=8 account=fair_amaia_cw_codegen qos=lowest

# dump_dir: !!CHANGETHIS!!
name: "debug"
steps: 1000
probe_freq: null #10
seed: 777
optim:
    lr: 3e-4
    warmup: 2000
    lr_min_ratio: 0.000001
    clip: 1.0

distributed:
    fsdp_type: full_shard
    compile: true
    model_dtype: bf16
    matmul_allow_tf32: false
    selective_activation_checkpointing: true
    tp_size: 1

model:
    dim: 1024
    n_layers: 8
    n_heads: 16
    state_dim: 128
    conv_size: 4
    dt_bias: false
    #init_std_factor: "current_depth"
    init_args:
        dt_max: 0.01
        dt_min: 0.00001

        dt_init_floor: 1e-4

        A_init_min: 0.1
        A_init_max: 2

data:
    root_dir: data/shuffled/llama2
    sources:
        wikipedia: 80.0
        arxiv: 20.0
    batch_size: 64
    prefetch_size: 64
    seq_len: 1024
    n_views: 2
    load_async: true
    tokenizer:
        name: sp
        path: tokenizers/tokenizer_final_32k.minus_inf_ws.model

profiling:
    run: true

checkpoint:
    dump:
        every: 100
        keep: 2
    eval:
        every: 100
        keep: 1

logging:
    freq: 10

eval:
    generator:
        max_tokens: 8192
        dtype: bf16
        temperature: 1.0
        top_p: 0.95
    harness:
        tasks:
            - hellaswag
            - piqa
            - task: nq_open
              num_fewshot: 5
    validation:
        max_steps: 100