yuzhe commited on
Commit
c545cc8
·
verified ·
1 Parent(s): 00622f3

Update model/config.json

Browse files
Files changed (1) hide show
  1. model/config.json +16 -5
model/config.json CHANGED
@@ -8,7 +8,10 @@
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
  "dtype": "bfloat16",
11
- "eos_token_id": 1,
 
 
 
12
  "final_logit_softcapping": null,
13
  "head_dim": 256,
14
  "hidden_activation": "gelu_pytorch_tanh",
@@ -43,11 +46,19 @@
43
  "pad_token_id": 0,
44
  "query_pre_attn_scalar": 256,
45
  "rms_norm_eps": 1e-06,
46
- "rope_local_base_freq": 10000.0,
47
- "rope_scaling": null,
48
- "rope_theta": 1000000.0,
 
 
 
 
 
 
 
49
  "sliding_window": 512,
50
- "transformers_version": "4.57.3",
 
51
  "use_bidirectional_attention": false,
52
  "use_cache": true,
53
  "vocab_size": 262144
 
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
  "dtype": "bfloat16",
11
+ "eos_token_id": [
12
+ 1,
13
+ 50
14
+ ],
15
  "final_logit_softcapping": null,
16
  "head_dim": 256,
17
  "hidden_activation": "gelu_pytorch_tanh",
 
46
  "pad_token_id": 0,
47
  "query_pre_attn_scalar": 256,
48
  "rms_norm_eps": 1e-06,
49
+ "rope_parameters": {
50
+ "full_attention": {
51
+ "rope_theta": 1000000.0,
52
+ "rope_type": "default"
53
+ },
54
+ "sliding_attention": {
55
+ "rope_theta": 10000.0,
56
+ "rope_type": "default"
57
+ }
58
+ },
59
  "sliding_window": 512,
60
+ "tie_word_embeddings": true,
61
+ "transformers_version": "5.2.0",
62
  "use_bidirectional_attention": false,
63
  "use_cache": true,
64
  "vocab_size": 262144