aklein4 commited on
Commit
57d9c79
·
verified ·
1 Parent(s): 881b76b

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. 000000005000/config.json +102 -0
  2. 000000005000/model.pt +3 -0
000000005000/config.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "iml.IMLModel",
3
+ "pretrained_url": null,
4
+ "pretrained_step": null,
5
+ "pretrained_strict": null,
6
+ "torch_dtype": "float32",
7
+ "vocab_size": 49153,
8
+ "bos_token_id": 0,
9
+ "eos_token_id": 0,
10
+ "pad_token_id": 49152,
11
+ "hidden_size": 1024,
12
+ "num_hidden_layers": 24,
13
+ "num_attention_heads": 16,
14
+ "num_key_value_heads": 16,
15
+ "intermediate_size": 2560,
16
+ "hidden_act": "silu",
17
+ "max_position_embeddings": 2048,
18
+ "rope_theta": 10000.0,
19
+ "initializer_range": 0.02,
20
+ "attention_dropout": false,
21
+ "attention_bias": false,
22
+ "rms_norm_eps": 1e-05,
23
+ "attention_kernel": "flash_attention",
24
+ "pure_modules": [],
25
+ "iml_eps": 1e-10,
26
+ "iml_loss_scale": 0.1,
27
+ "sharding": {
28
+ "model.embed_tokens.weight": [
29
+ "fsdp",
30
+ null
31
+ ],
32
+ "lm_head.weight": [
33
+ "fsdp",
34
+ null
35
+ ],
36
+ "model.layers.*.self_attn.q_proj.weight": [
37
+ "fsdp",
38
+ null
39
+ ],
40
+ "model.layers.*.self_attn.k_proj.weight": [
41
+ null,
42
+ "fsdp"
43
+ ],
44
+ "model.layers.*.self_attn.v_proj.weight": [
45
+ null,
46
+ "fsdp"
47
+ ],
48
+ "model.layers.*.self_attn.o_proj.weight": [
49
+ "fsdp",
50
+ null
51
+ ],
52
+ "model.layers.*.mlp.gate_proj.weight": [
53
+ "fsdp",
54
+ null
55
+ ],
56
+ "model.layers.*.mlp.up_proj.weight": [
57
+ "fsdp",
58
+ null
59
+ ],
60
+ "model.layers.*.mlp.down_proj.weight": [
61
+ null,
62
+ "fsdp"
63
+ ],
64
+ "model.layers.*.input_layernorm.weight": [
65
+ "fsdp"
66
+ ],
67
+ "model.layers.*.post_attention_layernorm.weight": [
68
+ "fsdp"
69
+ ],
70
+ "model.norm.weight": [
71
+ "fsdp"
72
+ ],
73
+ "model.layers.*": [
74
+ [
75
+ "data",
76
+ "fsdp"
77
+ ],
78
+ null,
79
+ null
80
+ ],
81
+ "lm_head": [
82
+ [
83
+ "data",
84
+ "fsdp"
85
+ ],
86
+ null,
87
+ null
88
+ ]
89
+ },
90
+ "remat": {
91
+ "activation_checkpoint_layers": [
92
+ "LlamaDecoderLayer"
93
+ ],
94
+ "optimization_barrier_layers": [
95
+ "LlamaDecoderLayer"
96
+ ],
97
+ "scan_layers": "model.layers",
98
+ "offload_tensors": [
99
+ "decoder_input"
100
+ ]
101
+ }
102
+ }
000000005000/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73512c3c079641d56f0defb3c8f44cd9e06cec37d2b77c09320977286b6796d
3
+ size 1560765447