windsornguyen commited on
Commit
b7c583f
·
verified ·
1 Parent(s): 45feca5

add: tokenizer/config files

Browse files
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Transformer",
3
+ "_name_or_path": "Transformer-340M-0408",
4
+ "architectures": ["TransformerForCausalLM"],
5
+ "dim": 1024,
6
+ "num_heads": 4,
7
+ "num_layers": 11,
8
+ "seq_len": 4096,
9
+ "vocab_size": 200064,
10
+ "inter_dim": 4096,
11
+ "mlp_scale": 12,
12
+ "bias": false,
13
+ "weight_tying": true,
14
+ "rope_theta": 10000.0,
15
+ "num_epochs": 1,
16
+ "global_bsz": 524288,
17
+ "bsz": 8,
18
+ "warmup_steps": 1907,
19
+ "eval_period": 50,
20
+ "save_period": 1000,
21
+ "max_lr": 4.0e-4,
22
+ "min_lr": 4.0e-5,
23
+ "max_norm": 1.0,
24
+ "fsdp": true,
25
+ "ddp": false,
26
+ "reshard_after_forward_policy": "default",
27
+ "mixed_precision": true,
28
+ "torch_dtype": "bfloat16",
29
+ "cpu_offload": false,
30
+ "sharding_strategy": "full_shard",
31
+ "state_dict_type": "full",
32
+ "auto_wrap_policy": "partial",
33
+ "backward_prefetch": "backward_pre",
34
+ "forward_prefetch": false,
35
+ "sync_module_states": true,
36
+ "use_orig_params": true,
37
+ "device_id": null,
38
+ "precision": {
39
+ "param": "bfloat16",
40
+ "reduce": "bfloat16",
41
+ "buffer": "bfloat16"
42
+ },
43
+ "fsdp_modules": [
44
+ "AttentionLayer"
45
+ ],
46
+ "num_workers": 0,
47
+ "snapshot_every_n_steps": 50,
48
+ "use_activation_checkpointing": true,
49
+ "torch_compile": true,
50
+ "torch_compile_kwargs": {
51
+ "mode": "default",
52
+ "fullgraph": true
53
+ },
54
+ "enable_compiled_autograd": false
55
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|endoftext|>",
6
+ "model_max_length": 128000,
7
+ "tokenizer_class": "GPT2Tokenizer",
8
+ "unk_token": "<|endoftext|>"
9
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff