paul-english commited on
Commit
10342ff
·
verified ·
1 Parent(s): 97f203a

Clean config: remove runtime fields, add _init_weights no-op

Browse files
Files changed (2) hide show
  1. README.md +15 -15
  2. config.json +0 -4
README.md CHANGED
@@ -3,22 +3,22 @@ license: apache-2.0
3
  library_name: transformers
4
  language: en
5
  tags:
6
- - tiner
7
- - iterative-bert
8
- - encoder
9
- - pytorch
10
  model-index:
11
- - name: iterativebert-base
12
- results:
13
- - task:
14
- type: fill-mask
15
- dataset:
16
- name: MBZUAI-LLM/SlimPajama-627B-DC
17
- type: MBZUAI-LLM/SlimPajama-627B-DC
18
- metrics:
19
- - name: Loss
20
- type: loss
21
- value: 5.0599
22
  ---
23
 
24
  # iterativebert-base
 
3
  library_name: transformers
4
  language: en
5
  tags:
6
+ - tiner
7
+ - iterative-bert
8
+ - encoder
9
+ - pytorch
10
  model-index:
11
+ - name: iterativebert-base
12
+ results:
13
+ - task:
14
+ type: fill-mask
15
+ dataset:
16
+ name: MBZUAI-LLM/SlimPajama-627B-DC
17
+ type: MBZUAI-LLM/SlimPajama-627B-DC
18
+ metrics:
19
+ - type: loss
20
+ value: 5.0599
21
+ name: Loss
22
  ---
23
 
24
  # iterativebert-base
config.json CHANGED
@@ -6,7 +6,6 @@
6
  "architectures": [
7
  "IterativeBert"
8
  ],
9
- "attn_implementation": "flash_attention_2",
10
  "conv_kernel_size": 2,
11
  "dropout_attn_output": 0.1,
12
  "dropout_attn_weights": 0.0,
@@ -24,9 +23,6 @@
24
  "l_step_rope_base": 10000.0,
25
  "l_step_use_conv": true,
26
  "layer_norm_eps": 1e-12,
27
- "liger_fused_rmsnorm": true,
28
- "liger_fused_rope": false,
29
- "liger_fused_swiglu": true,
30
  "max_position_embeddings": 2048,
31
  "model_type": "iterative_bert",
32
  "norm_type": "layernorm",
 
6
  "architectures": [
7
  "IterativeBert"
8
  ],
 
9
  "conv_kernel_size": 2,
10
  "dropout_attn_output": 0.1,
11
  "dropout_attn_weights": 0.0,
 
23
  "l_step_rope_base": 10000.0,
24
  "l_step_use_conv": true,
25
  "layer_norm_eps": 1e-12,
 
 
 
26
  "max_position_embeddings": 2048,
27
  "model_type": "iterative_bert",
28
  "norm_type": "layernorm",