aklein4 commited on
Commit
c924e2c
·
verified ·
1 Parent(s): 3ea9162

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. 000000020000/config.json +102 -0
  2. 000000020000/model.pt +3 -0
000000020000/config.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pure_modules": [],
3
+ "remat": {
4
+ "activation_checkpoint_layers": [
5
+ "LlamaDecoderLayer"
6
+ ],
7
+ "scan_layers": "model.layers",
8
+ "offload_tensors": [
9
+ "decoder_input"
10
+ ],
11
+ "optimization_barrier_layers": [
12
+ "LlamaDecoderLayer"
13
+ ]
14
+ },
15
+ "type": "custom_llama.CustomLlamaForCausalLM",
16
+ "pretrained_url": "aklein4/SmolLM2-1.7B-TPU",
17
+ "pretrained_step": 0,
18
+ "pretrained_strict": false,
19
+ "torch_dtype": "float32",
20
+ "vocab_size": 49152,
21
+ "bos_token_id": 0,
22
+ "eos_token_id": 0,
23
+ "pad_token_id": 49152,
24
+ "hidden_size": 2048,
25
+ "num_hidden_layers": 24,
26
+ "num_attention_heads": 32,
27
+ "num_key_value_heads": 32,
28
+ "intermediate_size": 8192,
29
+ "hidden_act": "silu",
30
+ "max_position_embeddings": 8192,
31
+ "rope_theta": 130000,
32
+ "initializer_range": null,
33
+ "gaussian_init": true,
34
+ "attention_dropout": false,
35
+ "attention_bias": false,
36
+ "rms_norm_eps": 1e-05,
37
+ "pad_attention_bias_value": -100.0,
38
+ "attention_kernel": "flash_attention",
39
+ "sharding": {
40
+ "model.embed_tokens.weight": [
41
+ "fsdp",
42
+ null
43
+ ],
44
+ "lm_head.weight": [
45
+ "fsdp",
46
+ null
47
+ ],
48
+ "model.layers.*.self_attn.q_proj.weight": [
49
+ "fsdp",
50
+ null
51
+ ],
52
+ "model.layers.*.self_attn.k_proj.weight": [
53
+ "fsdp",
54
+ null
55
+ ],
56
+ "model.layers.*.self_attn.v_proj.weight": [
57
+ "fsdp",
58
+ null
59
+ ],
60
+ "model.layers.*.self_attn.o_proj.weight": [
61
+ "fsdp",
62
+ null
63
+ ],
64
+ "model.layers.*.mlp.gate_proj.weight": [
65
+ "fsdp",
66
+ null
67
+ ],
68
+ "model.layers.*.mlp.up_proj.weight": [
69
+ "fsdp",
70
+ null
71
+ ],
72
+ "model.layers.*.mlp.down_proj.weight": [
73
+ null,
74
+ "fsdp"
75
+ ],
76
+ "model.layers.*.input_layernorm.weight": [
77
+ "fsdp"
78
+ ],
79
+ "model.layers.*.post_attention_layernorm.weight": [
80
+ "fsdp"
81
+ ],
82
+ "model.norm.weight": [
83
+ "fsdp"
84
+ ],
85
+ "model.layers.*": [
86
+ [
87
+ "data",
88
+ "fsdp"
89
+ ],
90
+ null,
91
+ null
92
+ ],
93
+ "lm_head": [
94
+ [
95
+ "data",
96
+ "fsdp"
97
+ ],
98
+ null,
99
+ null
100
+ ]
101
+ }
102
+ }
000000020000/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369764e76e0440c8b6915363e5c05c56c273a7e5012b6722f46f5083e2cb0d18
3
+ size 7248226091