lliu01 commited on
Commit
86c99dd
·
verified ·
1 Parent(s): 35430e1

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -1,44 +1,39 @@
1
  {
2
- "_name_or_path": "unsloth/llama-3-8b-bnb-4bit",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "eos_token_id": 128001,
 
 
 
 
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
- "max_position_embeddings": 8192,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
  "pretraining_tp": 1,
21
- "quantization_config": {
22
- "_load_in_4bit": true,
23
- "_load_in_8bit": false,
24
- "bnb_4bit_compute_dtype": "bfloat16",
25
- "bnb_4bit_quant_storage": "uint8",
26
- "bnb_4bit_quant_type": "nf4",
27
- "bnb_4bit_use_double_quant": true,
28
- "llm_int8_enable_fp32_cpu_offload": false,
29
- "llm_int8_has_fp16_weight": false,
30
- "llm_int8_skip_modules": null,
31
- "llm_int8_threshold": 6.0,
32
- "load_in_4bit": true,
33
- "load_in_8bit": false,
34
- "quant_method": "bitsandbytes"
35
- },
36
  "rms_norm_eps": 1e-05,
37
- "rope_scaling": null,
 
 
 
 
 
 
38
  "rope_theta": 500000.0,
39
  "tie_word_embeddings": false,
40
- "torch_dtype": "float16",
41
- "transformers_version": "4.43.2",
42
  "use_cache": true,
43
  "vocab_size": 128256
44
  }
 
1
  {
2
+ "_name_or_path": "meta-llama/Meta-Llama-3.1-8B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
  "hidden_act": "silu",
15
  "hidden_size": 4096,
16
  "initializer_range": 0.02,
17
  "intermediate_size": 14336,
18
+ "max_position_embeddings": 131072,
19
  "mlp_bias": false,
20
  "model_type": "llama",
21
  "num_attention_heads": 32,
22
  "num_hidden_layers": 32,
23
  "num_key_value_heads": 8,
24
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "rms_norm_eps": 1e-05,
26
+ "rope_scaling": {
27
+ "factor": 8.0,
28
+ "high_freq_factor": 4.0,
29
+ "low_freq_factor": 1.0,
30
+ "original_max_position_embeddings": 8192,
31
+ "rope_type": "llama3"
32
+ },
33
  "rope_theta": 500000.0,
34
  "tie_word_embeddings": false,
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.44.2",
37
  "use_cache": true,
38
  "vocab_size": 128256
39
  }
generation_config.json CHANGED
@@ -1,9 +1,12 @@
1
  {
2
  "bos_token_id": 128000,
3
  "do_sample": true,
4
- "eos_token_id": 128001,
5
- "max_length": 4096,
 
 
 
6
  "temperature": 0.6,
7
  "top_p": 0.9,
8
- "transformers_version": "4.43.2"
9
  }
 
1
  {
2
  "bos_token_id": 128000,
3
  "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.44.2"
12
  }
model-00001-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3beeac3ca0cd17d52088f5ce85ba306de603010d4bde8bf45c889d5884ecc05
3
  size 4886466168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0137de6d8a839fb38dc81bee706b03dc9d1dff9a3afed2332357a4a812c7014f
3
  size 4886466168
model-00002-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bf867e919a6372733661da39823b6b9c54967854efb6e719f15c98487c660dd
3
  size 4832007448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b228e2719cbfaf9d46fad2a63dfd715714bc1bf7f06d8c78639403a07f95cd4
3
  size 4832007448
model-00003-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:908b76db20e4569c2d65c088164102e07acb3a3fcb487f7d3f3c393f63b31427
3
  size 4999813112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4511668eedfe7ed6663f5af33c21ba857ec0c2c95f2ff5e4bcaf0ea3b3962d4
3
  size 4999813112
model-00004-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc70e954eb05e165252f50597b0a4e32962b9b52a47053827866ae735ab09257
3
  size 4999813128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e13a904c883180c8b942d7eff317b719d37dc33c2c2ef6d6b85fdebf254a2d
3
  size 4999813128
model-00005-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b08a367e8dd5cd71c5f7129ae09ad50c0f608898c6f54dfd147129c866fc060e
3
  size 4832007496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9472bd940e4b770344d503a3d11522373f5fda1661c10ca76a0dcf06bbbba890
3
  size 4832007496
model-00006-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11203ef66e52bf7b78612d976b581dc86c0829d35bc3d513fc1b1f68f0f85f50
3
  size 4999813120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10266ed4a0a4e3b37252d5aeb7f6704e1be49e517d34acbdedce6d6c6a2c3ca4
3
  size 4999813120
model-00007-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:769bb36358610cf5b70972c03dcb3e2a373337f368f104478d67f9ab0bde72d7
3
  size 2571158184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bcac2612656595f4bcd4e60954924df1f780320e61900081d6a49c1853880a3
3
  size 2571158184
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff