aklein4 commited on
Commit
76f1e3e
·
verified ·
1 Parent(s): 3ae4ba4

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. 000000000500/config.json +118 -0
  2. 000000000500/model.pt +3 -0
000000000500/config.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "ittt.ItttModel",
3
+ "pretrained_url": null,
4
+ "pretrained_step": null,
5
+ "pretrained_strict": null,
6
+ "torch_dtype": "float32",
7
+ "vocab_size": 49153,
8
+ "bos_token_id": 0,
9
+ "eos_token_id": 0,
10
+ "pad_token_id": 49152,
11
+ "hidden_size": 2048,
12
+ "num_hidden_layers": 22,
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 4,
15
+ "intermediate_size": 5632,
16
+ "hidden_act": "silu",
17
+ "max_position_embeddings": 2048,
18
+ "rope_theta": 10000.0,
19
+ "initializer_range": 0.02,
20
+ "attention_dropout": false,
21
+ "attention_bias": false,
22
+ "rms_norm_eps": 1e-05,
23
+ "attention_kernel": "flash_attention",
24
+ "pure_modules": [],
25
+ "chunk_size": 1024,
26
+ "rank": 512,
27
+ "base_lr": 0.01,
28
+ "momentum_beta": 0.75,
29
+ "momentum_dtype": "bfloat16",
30
+ "state_dtype": "float32",
31
+ "sharding": {
32
+ "model.embed_tokens.weight": [
33
+ "fsdp",
34
+ null
35
+ ],
36
+ "lm_head.weight": [
37
+ "fsdp",
38
+ null
39
+ ],
40
+ "model.layers.*.self_attn.q_proj.weight": [
41
+ "fsdp",
42
+ null
43
+ ],
44
+ "model.layers.*.self_attn.k_proj.weight": [
45
+ null,
46
+ "fsdp"
47
+ ],
48
+ "model.layers.*.self_attn.v_proj.weight": [
49
+ null,
50
+ "fsdp"
51
+ ],
52
+ "model.layers.*.self_attn.o_proj.weight": [
53
+ "fsdp",
54
+ null
55
+ ],
56
+ "model.layers.*.mlp.gate_proj.weight": [
57
+ "fsdp",
58
+ null
59
+ ],
60
+ "model.layers.*.mlp.up_proj.weight": [
61
+ "fsdp",
62
+ null
63
+ ],
64
+ "model.layers.*.mlp.down_proj.linear.weight": [
65
+ null,
66
+ "fsdp"
67
+ ],
68
+ "model.layers.*.mlp.down_proj.log_lr": [
69
+ null,
70
+ "fsdp"
71
+ ],
72
+ "model.layers.*.mlp.down_proj.base_state_proj.weight": [
73
+ null,
74
+ "fsdp"
75
+ ],
76
+ "model.layers.*.mlp.down_proj.out_proj.weight": [
77
+ "fsdp",
78
+ null
79
+ ],
80
+ "model.layers.*.input_layernorm.weight": [
81
+ "fsdp"
82
+ ],
83
+ "model.layers.*.post_attention_layernorm.weight": [
84
+ "fsdp"
85
+ ],
86
+ "model.norm.weight": [
87
+ "fsdp"
88
+ ],
89
+ "model.layers.*": [
90
+ [
91
+ "data",
92
+ "fsdp"
93
+ ],
94
+ null,
95
+ null
96
+ ],
97
+ "lm_head": [
98
+ [
99
+ "data",
100
+ "fsdp"
101
+ ],
102
+ null,
103
+ null
104
+ ]
105
+ },
106
+ "remat": {
107
+ "activation_checkpoint_layers": [
108
+ "LlamaDecoderLayer"
109
+ ],
110
+ "optimization_barrier_layers": [
111
+ "LlamaDecoderLayer"
112
+ ],
113
+ "scan_layers": "model.layers",
114
+ "offload_tensors": [
115
+ "decoder_input"
116
+ ]
117
+ }
118
+ }
000000000500/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ef8595f5b818b65a3ef6cdb5af7aace7b1b55e2ba04199162b4b09531d679d
3
+ size 5281087423