joackimagno commited on
Commit
594323f
·
verified ·
1 Parent(s): e075081

(Trained with Unsloth)

Browse files
config.json CHANGED
@@ -1,31 +1,32 @@
1
  {
2
  "architectures": [
3
- "Qwen2ForCausalLM"
4
  ],
 
5
  "attention_dropout": 0.0,
6
- "eos_token_id": 151643,
 
 
7
  "hidden_act": "silu",
8
- "hidden_size": 3584,
9
  "initializer_range": 0.02,
10
- "intermediate_size": 18944,
11
- "max_position_embeddings": 131072,
12
- "max_window_layers": 28,
13
- "model_type": "qwen2",
14
- "num_attention_heads": 28,
15
  "num_hidden_layers": 28,
16
  "num_key_value_heads": 4,
17
- "pad_token_id": 151654,
 
18
  "rms_norm_eps": 1e-06,
19
  "rope_scaling": null,
20
- "rope_theta": 1000000.0,
21
- "sliding_window": null,
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
- "transformers_version": "4.52.4",
25
- "unsloth_fixed": true,
26
- "unsloth_version": "2025.6.2",
27
  "use_cache": true,
28
- "use_mrope": false,
29
- "use_sliding_window": false,
30
- "vocab_size": 152064
31
  }
 
1
  {
2
  "architectures": [
3
+ "LlamaForCausalLM"
4
  ],
5
+ "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 11,
9
+ "head_dim": 256,
10
  "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 23040,
14
+ "max_position_embeddings": 32768,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 12,
18
  "num_hidden_layers": 28,
19
  "num_key_value_heads": 4,
20
+ "pad_token_id": 131072,
21
+ "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-06,
23
  "rope_scaling": null,
24
+ "rope_theta": 1000042,
 
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "bfloat16",
27
+ "transformers_version": "4.53.0",
28
+ "unsloth_optimized": true,
29
+ "unsloth_version": "2025.6.12",
30
  "use_cache": true,
31
+ "vocab_size": 131073
 
 
32
  }
generation_config.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "bos_token_id": 151643,
3
- "eos_token_id": 151643,
4
- "max_length": 131072,
5
- "max_new_tokens": 2048,
6
- "pad_token_id": 151654,
7
- "transformers_version": "4.52.4"
8
  }
 
1
  {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 11,
4
+ "eos_token_id": 11,
5
+ "max_length": 32768,
6
+ "pad_token_id": 131072,
7
+ "transformers_version": "4.53.0"
8
  }
special_tokens_map.json CHANGED
@@ -1,18 +1,28 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>",
5
- "<|object_ref_start|>",
6
- "<|object_ref_end|>",
7
- "<|box_start|>",
8
- "<|box_end|>",
9
- "<|quad_start|>",
10
- "<|quad_end|>",
11
- "<|vision_start|>",
12
- "<|vision_end|>",
13
- "<|vision_pad|>",
14
- "<|image_pad|>",
15
- "<|video_pad|>"
 
 
 
 
 
 
 
 
 
 
16
  ],
17
  "eos_token": {
18
  "content": "<|endoftext|>",
@@ -21,11 +31,5 @@
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
- "pad_token": {
25
- "content": "<|vision_pad|>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- }
31
  }
 
1
  {
2
  "additional_special_tokens": [
3
+ ">>TITLE<<",
4
+ ">>ABSTRACT<<",
5
+ ">>INTRODUCTION<<",
6
+ ">>SUMMARY<<",
7
+ ">>COMMENT<<",
8
+ ">>ANSWER<<",
9
+ ">>QUESTION<<",
10
+ ">>DOMAIN<<",
11
+ ">>EMAIL_ADDRESS<<",
12
+ ">>IP_ADDRESS<<",
13
+ "<|startoftext|>",
14
+ ">>IP_ADDRESS_0<<",
15
+ ">>IP_ADDRESS_1<<",
16
+ ">>IP_ADDRESS_2<<",
17
+ ">>IP_ADDRESS_3<<",
18
+ ">>IP_ADDRESS_4<<",
19
+ ">>IP_ADDRESS_5<<",
20
+ ">>IP_ADDRESS_6<<",
21
+ ">>IP_ADDRESS_7<<",
22
+ ">>IP_ADDRESS_8<<",
23
+ ">>IP_ADDRESS_9<<",
24
+ ">>PASSWORD<<",
25
+ ">>KEY<<"
26
  ],
27
  "eos_token": {
28
  "content": "<|endoftext|>",
 
31
  "rstrip": false,
32
  "single_word": false
33
  },
34
+ "pad_token": "<|PAD_TOKEN|>"
 
 
 
 
 
 
35
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
- size 11421896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:902807d87496a739869b5a41f76e8ea256e979f107a5716c7321bade51619426
3
+ size 9780960
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff