milli19 commited on
Commit
39a0f29
·
verified ·
1 Parent(s): 9b65e5c

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GPTNeoXForCausalLM"
4
+ ],
5
+ "bos_token_id": 0,
6
+ "eos_token_id": 0,
7
+ "hidden_act": "gelu",
8
+ "hidden_size": 2048,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 8192,
11
+ "layer_norm_eps": 1e-05,
12
+ "max_position_embeddings": 2048,
13
+ "model_type": "gpt_neox",
14
+ "num_attention_heads": 8,
15
+ "num_hidden_layers": 16,
16
+ "rotary_emb_base": 10000,
17
+ "rotary_pct": 0.25,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.24.0",
21
+ "use_cache": true,
22
+ "use_parallel_residual": true,
23
+ "vocab_size": 50304
24
+ }
hyperparameters.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: pythia-1b
2
+ out_dir: /data/users/zichunyu/out/pythia-1b/fineweb/sample-100BT
3
+ resume: false
4
+ data:
5
+ class_path: litgpt.data.FineWeb
6
+ init_args:
7
+ data_path: /data/users/zichunyu/data/fineweb/sample-100BT
8
+ val_split_fraction: 0.0005
9
+ seed: 42
10
+ num_workers: 8
11
+ train:
12
+ save_interval: 5000
13
+ log_interval: 50
14
+ global_batch_size: 512
15
+ micro_batch_size: 16
16
+ lr_warmup_steps: 2000
17
+ max_tokens: 50000000000
18
+ tie_embeddings: false
19
+ max_norm: 1.0
20
+ min_lr: 4.0e-05
21
+ eval:
22
+ interval: 200000
23
+ max_iters: 100
24
+ initial_validation: false
25
+ optimizer:
26
+ class_path: torch.optim.AdamW
27
+ init_args:
28
+ lr: 0.0004
29
+ weight_decay: 0.1
30
+ betas:
31
+ - 0.9
32
+ - 0.95
33
+ devices: auto
34
+ tokenizer_dir: checkpoints/EleutherAI/pythia-1b
35
+ logger_name: wandb
36
+ exp_name: pythia-1b_fineweb_sample-100BT
37
+ seed: 42
lit_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66aee03d68a422cf10f58a5d25852defd931eb96838e75b702003de8b2b45945
3
+ size 12141642522
model_config.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bias: true
2
+ block_size: 2048
3
+ gelu_approximate: none
4
+ head_size: 256
5
+ hf_config:
6
+ name: pythia-1b
7
+ org: EleutherAI
8
+ intermediate_size: 8192
9
+ lm_head_bias: false
10
+ mlp_class_name: GptNeoxMLP
11
+ n_embd: 2048
12
+ n_expert: 0
13
+ n_expert_per_token: 0
14
+ n_head: 8
15
+ n_layer: 16
16
+ n_query_groups: 8
17
+ name: pythia-1b
18
+ norm_class_name: LayerNorm
19
+ norm_eps: 1.0e-05
20
+ padded_vocab_size: 50304
21
+ padding_multiple: 128
22
+ parallel_residual: true
23
+ rope_base: 10000
24
+ rope_condense_ratio: 1
25
+ rotary_percentage: 0.25
26
+ scale_embeddings: false
27
+ shared_attention_norm: false
28
+ vocab_size: 50254
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "name_or_path": "EleutherAI/gpt-neox-20b",
6
+ "special_tokens_map_file": "/admin/home-hailey/.cache/huggingface/hub/models--EleutherAI--gpt-neox-20b/snapshots/4e49eadb5d14bd22f314ec3f45b69a87b88c7691/special_tokens_map.json",
7
+ "tokenizer_class": "GPTNeoXTokenizer",
8
+ "unk_token": "<|endoftext|>"
9
+ }