AlonBBar commited on
Commit
633711c
·
verified ·
1 Parent(s): 7a0f136

(Trained with Unsloth)

Browse files
Files changed (2) hide show
  1. config.json +145 -146
  2. tokenizer_config.json +4 -3
config.json CHANGED
@@ -1,150 +1,149 @@
1
  {
2
- "architectures": [
3
- "Phi3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "auto_map": {
8
- "AutoConfig": "configuration_phi3.Phi3Config",
9
- "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
10
- "AutoTokenizer": "Xenova/gpt-4o"
11
- },
12
- "bos_token_id": 199999,
13
- "dtype": "bfloat16",
14
- "embd_pdrop": 0.0,
15
- "eos_token_id": 200020,
16
- "full_attn_mod": 1,
17
- "hidden_act": "silu",
18
- "hidden_size": 3072,
19
- "ignore_keys_at_rope_validation": null,
20
- "initializer_range": 0.02,
21
- "intermediate_size": 8192,
22
- "interpolate_factor": 1,
23
- "lm_head_bias": false,
24
- "max_position_embeddings": 131072,
25
- "mlp_bias": false,
26
- "model_name": "unsloth/phi-4-mini-instruct-unsloth-bnb-4bit",
27
- "model_type": "phi3",
28
- "num_attention_heads": 24,
29
- "num_hidden_layers": 32,
30
- "num_key_value_heads": 8,
31
- "original_max_position_embeddings": 4096,
32
- "pad_token_id": 200029,
33
- "partial_rotary_factor": 0.75,
34
- "resid_pdrop": 0.0,
35
- "rms_norm_eps": 1e-05,
36
- "rope_parameters": {
37
- "long_factor": [
38
- 1,
39
- 1.118320672,
40
- 1.250641126,
41
- 1.398617824,
42
- 1.564103225,
43
- 1.74916897,
44
- 1.956131817,
45
- 2.187582649,
46
- 2.446418898,
47
- 2.735880826,
48
- 3.059592084,
49
- 3.421605075,
50
- 3.826451687,
51
- 4.279200023,
52
- 4.785517845,
53
- 5.351743533,
54
- 5.984965424,
55
- 6.693110555,
56
- 7.485043894,
57
- 8.370679318,
58
- 9.36110372,
59
- 10.4687158,
60
- 11.70738129,
61
- 13.09260651,
62
- 14.64173252,
63
- 16.37415215,
64
- 18.31155283,
65
- 20.47818807,
66
- 22.90118105,
67
- 25.61086418,
68
- 28.64115884,
69
- 32.03,
70
- 32.1,
71
- 32.13,
72
- 32.23,
73
- 32.6,
74
- 32.61,
75
- 32.64,
76
- 32.66,
77
- 32.7,
78
- 32.71,
79
- 32.93,
80
- 32.97,
81
- 33.28,
82
- 33.49,
83
- 33.5,
84
- 44.16,
85
- 47.77
86
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  "original_max_position_embeddings": 4096,
 
88
  "partial_rotary_factor": 0.75,
89
- "rope_theta": 10000.0,
90
- "rope_type": "longrope",
91
- "short_factor": [
92
- 1.0,
93
- 1.0,
94
- 1.0,
95
- 1.0,
96
- 1.0,
97
- 1.0,
98
- 1.0,
99
- 1.0,
100
- 1.0,
101
- 1.0,
102
- 1.0,
103
- 1.0,
104
- 1.0,
105
- 1.0,
106
- 1.0,
107
- 1.0,
108
- 1.0,
109
- 1.0,
110
- 1.0,
111
- 1.0,
112
- 1.0,
113
- 1.0,
114
- 1.0,
115
- 1.0,
116
- 1.0,
117
- 1.0,
118
- 1.0,
119
- 1.0,
120
- 1.0,
121
- 1.0,
122
- 1.0,
123
- 1.0,
124
- 1.0,
125
- 1.0,
126
- 1.0,
127
- 1.0,
128
- 1.0,
129
- 1.0,
130
- 1.0,
131
- 1.0,
132
- 1.0,
133
- 1.0,
134
- 1.0,
135
- 1.0,
136
- 1.0,
137
- 1.0,
138
- 1.0,
139
- 1.0
140
- ],
141
- "type": "longrope"
142
- },
143
- "sliding_window": 262144,
144
- "tie_word_embeddings": true,
145
- "transformers_version": "5.2.0",
146
- "unsloth_fixed": true,
147
- "unsloth_version": "2026.3.4",
148
- "use_cache": false,
149
- "vocab_size": 200064
150
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
+ "architectures": [
3
+ "Phi3ForCausalLM"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_phi3.Phi3Config",
9
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
10
+ "AutoTokenizer": "Xenova/gpt-4o"
11
+ },
12
+ "bos_token_id": 199999,
13
+ "torch_dtype": "bfloat16",
14
+ "embd_pdrop": 0.0,
15
+ "eos_token_id": 200020,
16
+ "full_attn_mod": 1,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 3072,
19
+ "ignore_keys_at_rope_validation": null,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 8192,
22
+ "interpolate_factor": 1,
23
+ "lm_head_bias": false,
24
+ "max_position_embeddings": 131072,
25
+ "mlp_bias": false,
26
+ "model_name": "unsloth/phi-4-mini-instruct-unsloth-bnb-4bit",
27
+ "model_type": "phi3",
28
+ "num_attention_heads": 24,
29
+ "num_hidden_layers": 32,
30
+ "num_key_value_heads": 8,
31
  "original_max_position_embeddings": 4096,
32
+ "pad_token_id": 200029,
33
  "partial_rotary_factor": 0.75,
34
+ "resid_pdrop": 0.0,
35
+ "rms_norm_eps": 1e-05,
36
+ "rope_parameters": {
37
+ "long_factor": [
38
+ 1,
39
+ 1.118320672,
40
+ 1.250641126,
41
+ 1.398617824,
42
+ 1.564103225,
43
+ 1.74916897,
44
+ 1.956131817,
45
+ 2.187582649,
46
+ 2.446418898,
47
+ 2.735880826,
48
+ 3.059592084,
49
+ 3.421605075,
50
+ 3.826451687,
51
+ 4.279200023,
52
+ 4.785517845,
53
+ 5.351743533,
54
+ 5.984965424,
55
+ 6.693110555,
56
+ 7.485043894,
57
+ 8.370679318,
58
+ 9.36110372,
59
+ 10.4687158,
60
+ 11.70738129,
61
+ 13.09260651,
62
+ 14.64173252,
63
+ 16.37415215,
64
+ 18.31155283,
65
+ 20.47818807,
66
+ 22.90118105,
67
+ 25.61086418,
68
+ 28.64115884,
69
+ 32.03,
70
+ 32.1,
71
+ 32.13,
72
+ 32.23,
73
+ 32.6,
74
+ 32.61,
75
+ 32.64,
76
+ 32.66,
77
+ 32.7,
78
+ 32.71,
79
+ 32.93,
80
+ 32.97,
81
+ 33.28,
82
+ 33.49,
83
+ 33.5,
84
+ 44.16,
85
+ 47.77
86
+ ],
87
+ "original_max_position_embeddings": 4096,
88
+ "partial_rotary_factor": 0.75,
89
+ "rope_theta": 10000.0,
90
+ "rope_type": "longrope",
91
+ "short_factor": [
92
+ 1.0,
93
+ 1.0,
94
+ 1.0,
95
+ 1.0,
96
+ 1.0,
97
+ 1.0,
98
+ 1.0,
99
+ 1.0,
100
+ 1.0,
101
+ 1.0,
102
+ 1.0,
103
+ 1.0,
104
+ 1.0,
105
+ 1.0,
106
+ 1.0,
107
+ 1.0,
108
+ 1.0,
109
+ 1.0,
110
+ 1.0,
111
+ 1.0,
112
+ 1.0,
113
+ 1.0,
114
+ 1.0,
115
+ 1.0,
116
+ 1.0,
117
+ 1.0,
118
+ 1.0,
119
+ 1.0,
120
+ 1.0,
121
+ 1.0,
122
+ 1.0,
123
+ 1.0,
124
+ 1.0,
125
+ 1.0,
126
+ 1.0,
127
+ 1.0,
128
+ 1.0,
129
+ 1.0,
130
+ 1.0,
131
+ 1.0,
132
+ 1.0,
133
+ 1.0,
134
+ 1.0,
135
+ 1.0,
136
+ 1.0,
137
+ 1.0,
138
+ 1.0,
139
+ 1.0
140
+ ],
141
+ "type": "longrope"
142
+ },
143
+ "sliding_window": 262144,
144
+ "tie_word_embeddings": true,
145
+ "unsloth_fixed": true,
146
+ "unsloth_version": "2026.3.4",
147
+ "use_cache": false,
148
+ "vocab_size": 200064
149
+ }
tokenizer_config.json CHANGED
@@ -4,10 +4,11 @@
4
  "bos_token": "<|endoftext|>",
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|end|>",
7
- "is_local": true,
8
  "model_max_length": 131072,
9
  "pad_token": "<|PAD▁TOKEN|>",
10
  "padding_side": "left",
11
  "tokenizer_class": "TokenizersBackend",
12
- "unk_token": "�"
13
- }
 
 
4
  "bos_token": "<|endoftext|>",
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|end|>",
7
+ "is_local": false,
8
  "model_max_length": 131072,
9
  "pad_token": "<|PAD▁TOKEN|>",
10
  "padding_side": "left",
11
  "tokenizer_class": "TokenizersBackend",
12
+ "unk_token": "�",
13
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% endif %}"
14
+ }