irusl commited on
Commit
f6e01ed
·
verified ·
1 Parent(s): c7fa4cf

Upload model

Browse files
config.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
- "_name_or_path": "data/bigdata_2",
3
- "architectures": [
4
- "StableLmForCausalLM"
5
- ],
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 100257,
8
- "eos_token_id": 100257,
9
- "hidden_act": "silu",
10
- "hidden_dropout": 0.0,
11
- "hidden_size": 2048,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 5632,
14
- "layer_norm_eps": 1e-05,
15
- "max_position_embeddings": 4096,
16
- "model_type": "stablelm",
17
- "num_attention_heads": 32,
18
- "num_hidden_layers": 24,
19
- "num_key_value_heads": 32,
20
- "pad_token_id": 100257,
21
- "partial_rotary_factor": 0.25,
22
- "qk_layernorm": false,
23
- "rope_scaling": null,
24
- "rope_theta": 10000,
25
- "tie_word_embeddings": false,
26
- "torch_dtype": "bfloat16",
27
- "transformers_version": "4.41.2",
28
- "use_cache": false,
29
- "use_parallel_residual": false,
30
- "use_qkv_bias": true,
31
- "vocab_size": 100352
32
- }
 
1
  {
2
+ "_name_or_path": "merge23",
3
+ "architectures": [
4
+ "StableLmForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 100257,
8
+ "eos_token_id": 100257,
9
+ "hidden_act": "silu",
10
+ "hidden_dropout": 0.0,
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 4096,
16
+ "model_type": "stablelm",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 24,
19
+ "num_key_value_heads": 32,
20
+ "pad_token_id": 100257,
21
+ "partial_rotary_factor": 0.25,
22
+ "qk_layernorm": false,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.40.2",
28
+ "use_cache": false,
29
+ "use_parallel_residual": false,
30
+ "use_qkv_bias": true,
31
+ "vocab_size": 100352
32
+ }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 100257,
4
  "do_sample": true,
5
  "eos_token_id": 100257,
6
- "transformers_version": "4.38.2"
7
  }
 
3
  "bos_token_id": 100257,
4
  "do_sample": true,
5
  "eos_token_id": 100257,
6
+ "transformers_version": "4.41.2"
7
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5be612b5e7b636f460533f3bad9c2cf8ad5ea8debae511badd4564aa37626943
3
- size 981631656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2933e0c010a75032b7db70bcc00c1c64cd5ee1590840bd1eecf97c2300fdfe5c
3
+ size 1963254496
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93bdc07ec34dcae126868b74797e5e427c8f3fde16dedd73e8650256a9f782fe
3
- size 996449440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7838e203d4983ec3d97ac38c0c4ac9841be546a1806da245fb987f4c0491b9d
3
+ size 1992883384
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4e7e56f6af8e58ccd58c421396ff06952f5d3b852b8347e4a36a75536f9246f
3
- size 899945896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c91a2c2f357e8f74a04e47dba2edad7cc5e94cf63afd5b8c268f06554839eb3
3
+ size 1799878064
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c31b54b8f8deb4d4d4b50f67e0f70de1e49beec2fff11e95f308fa6ab6588701
3
- size 411041920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25ecf0759898dcf0a584c34f1151b1616a0dcd185685ef601d69873f2f56776f
3
+ size 822083712
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 3289030656
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 6578061312
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 3896,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 3896
12
  },
13
  "direction": "Left",
14
  "pad_to_multiple_of": null,
@@ -350,6 +350,7 @@
350
  "end_of_word_suffix": "",
351
  "fuse_unk": false,
352
  "byte_fallback": false,
 
353
  "vocab": {
354
  "!": 0,
355
  "\"": 1,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 3696,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 3696
12
  },
13
  "direction": "Left",
14
  "pad_to_multiple_of": null,
 
350
  "end_of_word_suffix": "",
351
  "fuse_unk": false,
352
  "byte_fallback": false,
353
+ "ignore_merges": false,
354
  "vocab": {
355
  "!": 0,
356
  "\"": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "100256": {
@@ -304,9 +305,15 @@
304
  "bos_token": "<|endoftext|>",
305
  "clean_up_tokenization_spaces": true,
306
  "eos_token": "<|endoftext|>",
 
307
  "model_max_length": 1000000000000000019884624838656,
 
308
  "pad_token": "<|endoftext|>",
 
309
  "padding_side": "left",
 
310
  "tokenizer_class": "GPT2Tokenizer",
 
 
311
  "unk_token": "<|endoftext|>"
312
  }
 
1
  {
2
+ "add_bos_token": true,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "100256": {
 
305
  "bos_token": "<|endoftext|>",
306
  "clean_up_tokenization_spaces": true,
307
  "eos_token": "<|endoftext|>",
308
+ "max_length": 3896,
309
  "model_max_length": 1000000000000000019884624838656,
310
+ "pad_to_multiple_of": null,
311
  "pad_token": "<|endoftext|>",
312
+ "pad_token_type_id": 0,
313
  "padding_side": "left",
314
+ "stride": 0,
315
  "tokenizer_class": "GPT2Tokenizer",
316
+ "truncation_side": "right",
317
+ "truncation_strategy": "longest_first",
318
  "unk_token": "<|endoftext|>"
319
  }