paultltc commited on
Commit
0d22882
·
verified ·
1 Parent(s): 7f0690d

Update modernvbert-embed to the new weight format

Browse files
Files changed (1) hide show
  1. config.json +89 -33
config.json CHANGED
@@ -1,50 +1,106 @@
1
  {
2
- "additional_vocab_size": 40,
3
- "architectures": [
4
- "BiModernVBert"
5
- ],
6
- "auto_map": {
7
- "AutoConfig": "configuration_modernvbert.ModernVBertConfig",
8
- "AutoModel": "modeling_modernvbert.ModernVBertModel",
9
- "AutoModelForMaskedLM": "modeling_modernvbert.ModernVBertForMaskedLM"
10
- },
11
- "freeze_config": {
12
- "freeze_lm_head": true,
13
- "freeze_text_layers": true,
14
- "freeze_vision_layers": true
15
- },
16
- "hidden_size": 768,
17
  "image_token_id": 50407,
18
  "initializer_range": 0.02,
19
- "max_position_embeddings": 8192,
20
  "model_type": "modernvbert",
21
- "neftune_noise_alpha": 0.0,
22
- "output_attentions": false,
23
  "pixel_shuffle_factor": 4,
24
- "qk_layer_norms": false,
25
- "scale_factor": 4,
26
  "text_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "hidden_size": 768,
 
 
28
  "intermediate_size": 1152,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "mlp_bias": false,
30
- "model_type": "vbert",
 
 
 
 
31
  "num_hidden_layers": 22,
32
- "text_model_name": "jhu-clsp/ettin-encoder-150m",
33
- "vocab_size": 50368
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  },
35
- "tie_word_embeddings": false,
36
- "torch_dtype": "float32",
37
- "transformers_version": null,
38
- "use_cache": true,
39
- "use_resampler": false,
40
  "vision_config": {
41
- "embed_dim": 768,
 
 
42
  "image_size": 512,
43
  "intermediate_size": 3072,
44
- "model_type": "vbert",
 
 
 
45
  "num_hidden_layers": 12,
46
- "patch_size": 16,
47
- "vision_model_name": "google/siglip2-base-patch16-512"
48
  },
49
- "vocab_size": 50368
 
 
 
 
 
 
 
 
50
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "image_token_id": 50407,
3
  "initializer_range": 0.02,
 
4
  "model_type": "modernvbert",
 
 
5
  "pixel_shuffle_factor": 4,
 
 
6
  "text_config": {
7
+ "_name_or_path": "ettin-encoder-150m",
8
+ "architectures": [
9
+ "ModernBertForMaskedLM"
10
+ ],
11
+ "attention_bias": false,
12
+ "attention_dropout": 0.0,
13
+ "causal_mask": false,
14
+ "classifier_activation": "gelu",
15
+ "classifier_bias": false,
16
+ "classifier_dropout": 0.0,
17
+ "classifier_pooling": "mean",
18
+ "cls_token_id": 50281,
19
+ "decoder_bias": true,
20
+ "deterministic_flash_attn": false,
21
+ "dtype": "float32",
22
+ "embedding_dropout": 0.0,
23
+ "global_attn_every_n_layers": 3,
24
+ "global_rope_theta": 160000.0,
25
+ "gradient_checkpointing": false,
26
+ "hidden_activation": "gelu",
27
  "hidden_size": 768,
28
+ "initializer_cutoff_factor": 2.0,
29
+ "initializer_range": 0.02,
30
  "intermediate_size": 1152,
31
+ "is_causal": false,
32
+ "layer_norm_eps": 1e-05,
33
+ "layer_types": [
34
+ "full_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "full_attention",
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "full_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "full_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "full_attention",
47
+ "sliding_attention",
48
+ "sliding_attention",
49
+ "full_attention",
50
+ "sliding_attention",
51
+ "sliding_attention",
52
+ "full_attention",
53
+ "sliding_attention",
54
+ "sliding_attention",
55
+ "full_attention"
56
+ ],
57
+ "local_attention": 128,
58
+ "local_rope_theta": 160000.0,
59
+ "max_position_embeddings": 7999,
60
  "mlp_bias": false,
61
+ "mlp_dropout": 0.0,
62
+ "model_type": "modernbert",
63
+ "norm_bias": false,
64
+ "norm_eps": 1e-05,
65
+ "num_attention_heads": 12,
66
  "num_hidden_layers": 22,
67
+ "position_embedding_type": "sans_pos",
68
+ "repad_logits_with_grad": false,
69
+ "rope_parameters": {
70
+ "full_attention": {
71
+ "rope_theta": 160000.0,
72
+ "rope_type": "default"
73
+ },
74
+ "sliding_attention": {
75
+ "rope_theta": 160000.0,
76
+ "rope_type": "default"
77
+ }
78
+ },
79
+ "sparse_pred_ignore_index": -100,
80
+ "sparse_prediction": false,
81
+ "vocab_size": 50408
82
  },
83
+ "transformers_version": "5.0.0.dev0",
 
 
 
 
84
  "vision_config": {
85
+ "attention_dropout": 0.0,
86
+ "hidden_act": "gelu_pytorch_tanh",
87
+ "hidden_size": 768,
88
  "image_size": 512,
89
  "intermediate_size": 3072,
90
+ "layer_norm_eps": 1e-06,
91
+ "model_type": "siglip_vision_model",
92
+ "num_attention_heads": 12,
93
+ "num_channels": 3,
94
  "num_hidden_layers": 12,
95
+ "patch_size": 16
 
96
  },
97
+ "tie_word_embeddings": false,
98
+ "architectures": [
99
+ "BiModernVBert"
100
+ ],
101
+ "freeze_config": {
102
+ "freeze_lm_head": true,
103
+ "freeze_text_layers": true,
104
+ "freeze_vision_layers": true
105
+ }
106
  }