ZainAli60 commited on
Commit
211f689
·
verified ·
1 Parent(s): 8d970d3

Upload GPTNeoForCausalLM

Browse files
Files changed (2) hide show
  1. config.json +5 -25
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,23 +1,11 @@
1
  {
2
- "_name_or_path": "/workspace/pretraining/local-models/training/2024-03-14_00-57-32",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTNeoForCausalLM"
6
  ],
7
  "attention_dropout": 0,
8
  "attention_layers": [
9
- "global",
10
- "local",
11
- "global",
12
- "local",
13
- "global",
14
- "local",
15
- "global",
16
- "local",
17
- "global",
18
- "local",
19
- "global",
20
- "local",
21
  "global",
22
  "local",
23
  "global",
@@ -37,7 +25,7 @@
37
  "global",
38
  "local"
39
  ],
40
- 12
41
  ]
42
  ],
43
  "bos_token_id": 50256,
@@ -45,28 +33,20 @@
45
  "embed_dropout": 0,
46
  "eos_token_id": 50256,
47
  "gradient_checkpointing": false,
48
- "hidden_size": 2048,
49
  "initializer_range": 0.02,
50
  "intermediate_size": null,
51
  "layer_norm_epsilon": 1e-05,
52
  "max_position_embeddings": 2048,
53
  "model_type": "gpt_neo",
54
- "num_heads": 16,
55
- "num_layers": 24,
56
  "resid_dropout": 0,
57
  "summary_activation": null,
58
  "summary_first_dropout": 0.1,
59
  "summary_proj_to_labels": true,
60
  "summary_type": "cls_index",
61
  "summary_use_proj": true,
62
- "task_specific_params": {
63
- "text-generation": {
64
- "do_sample": true,
65
- "max_length": 50,
66
- "temperature": 0.9
67
- }
68
- },
69
- "tokenizer_class": "GPT2Tokenizer",
70
  "torch_dtype": "float32",
71
  "transformers_version": "4.38.0",
72
  "use_cache": true,
 
1
  {
2
+ "_name_or_path": "/workspace/pretraining/local-models/training/2024-03-14_01-13-50",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTNeoForCausalLM"
6
  ],
7
  "attention_dropout": 0,
8
  "attention_layers": [
 
 
 
 
 
 
 
 
 
 
 
 
9
  "global",
10
  "local",
11
  "global",
 
25
  "global",
26
  "local"
27
  ],
28
+ 6
29
  ]
30
  ],
31
  "bos_token_id": 50256,
 
33
  "embed_dropout": 0,
34
  "eos_token_id": 50256,
35
  "gradient_checkpointing": false,
36
+ "hidden_size": 768,
37
  "initializer_range": 0.02,
38
  "intermediate_size": null,
39
  "layer_norm_epsilon": 1e-05,
40
  "max_position_embeddings": 2048,
41
  "model_type": "gpt_neo",
42
+ "num_heads": 12,
43
+ "num_layers": 12,
44
  "resid_dropout": 0,
45
  "summary_activation": null,
46
  "summary_first_dropout": 0.1,
47
  "summary_proj_to_labels": true,
48
  "summary_type": "cls_index",
49
  "summary_use_proj": true,
 
 
 
 
 
 
 
 
50
  "torch_dtype": "float32",
51
  "transformers_version": "4.38.0",
52
  "use_cache": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:505087392bbb142526f2a290e4ea269a5eb82d890697a19ab98fc6cf3e4bfc21
3
- size 3079115120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c202093d2773a3b9816c689add24a9aa5845ba244e47154d29d710982de767ec
3
+ size 500811336