philperceptron commited on
Commit
3eef895
·
1 Parent(s): a5d5cc4

5.0.0 compatibility, bare Param position embed, sequential removal

Browse files
Files changed (3) hide show
  1. config.json +12 -5
  2. generation_config.json +4 -1
  3. model.safetensors +2 -2
config.json CHANGED
@@ -1,4 +1,12 @@
1
  {
 
 
 
 
 
 
 
 
2
  "architectures": [
3
  "IsaacForConditionalGeneration"
4
  ],
@@ -53,12 +61,9 @@
53
  "num_attention_heads": 16,
54
  "num_hidden_layers": 28,
55
  "num_key_value_heads": 8,
 
56
  "pixel_shuffle_scale": 2,
57
  "rms_norm_eps": 1e-06,
58
- "rope_parameters": {
59
- "rope_theta": 1000000,
60
- "rope_type": "default"
61
- },
62
  "rope_theta": 1000000,
63
  "sliding_window": null,
64
  "text_config": {
@@ -112,6 +117,7 @@
112
  "num_attention_heads": 16,
113
  "num_hidden_layers": 28,
114
  "num_key_value_heads": 8,
 
115
  "pixel_shuffle_scale": 2,
116
  "rms_norm_eps": 1e-06,
117
  "rope_parameters": {
@@ -119,6 +125,7 @@
119
  "rope_type": "default"
120
  },
121
  "sliding_window": null,
 
122
  "use_cache": true,
123
  "use_sliding_window": false,
124
  "vision_max_num_patches": 6144,
@@ -137,7 +144,7 @@
137
  "vocab_size": 151936
138
  },
139
  "tie_word_embeddings": false,
140
- "transformers_version": "5.0.0.dev0",
141
  "use_cache": true,
142
  "use_sliding_window": false,
143
  "vision_config": {
 
1
  {
2
+ "_rope_parameters": {
3
+ "rope_theta": 1000000,
4
+ "rope_type": "default"
5
+ },
6
+ "_rope_scaling": {
7
+ "rope_theta": 1000000,
8
+ "rope_type": "default"
9
+ },
10
  "architectures": [
11
  "IsaacForConditionalGeneration"
12
  ],
 
61
  "num_attention_heads": 16,
62
  "num_hidden_layers": 28,
63
  "num_key_value_heads": 8,
64
+ "pad_token_id": null,
65
  "pixel_shuffle_scale": 2,
66
  "rms_norm_eps": 1e-06,
 
 
 
 
67
  "rope_theta": 1000000,
68
  "sliding_window": null,
69
  "text_config": {
 
117
  "num_attention_heads": 16,
118
  "num_hidden_layers": 28,
119
  "num_key_value_heads": 8,
120
+ "pad_token_id": null,
121
  "pixel_shuffle_scale": 2,
122
  "rms_norm_eps": 1e-06,
123
  "rope_parameters": {
 
125
  "rope_type": "default"
126
  },
127
  "sliding_window": null,
128
+ "tie_word_embeddings": false,
129
  "use_cache": true,
130
  "use_sliding_window": false,
131
  "vision_max_num_patches": 6144,
 
144
  "vocab_size": 151936
145
  },
146
  "tie_word_embeddings": false,
147
+ "transformers_version": "5.0.0",
148
  "use_cache": true,
149
  "use_sliding_window": false,
150
  "vision_config": {
generation_config.json CHANGED
@@ -2,5 +2,8 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 151643,
4
  "eos_token_id": 151645,
5
- "transformers_version": "5.0.0.dev0"
 
 
 
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 151643,
4
  "eos_token_id": 151645,
5
+ "output_attentions": false,
6
+ "output_hidden_states": false,
7
+ "transformers_version": "5.0.0",
8
+ "use_cache": true
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de0ee41ebb1e9d9d4cb295b9ea2f7e2b2e2ab05c60cc3de2ffd77a59c7a8d61
3
- size 10268395696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1396d694a5d7fbbb5caf8338ca8a0921601d2f4fa935d4c73e6b21727837a0df
3
+ size 10268395688