Upload SmolVLMForConditionalGeneration
Browse files- README.md +3 -0
- config.json +5 -2
- model.safetensors +1 -1
README.md
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# SmolDriver
|
| 2 |
|
| 3 |
Fine-tuned SmolVLM2-2.2B model.
|
|
|
|
| 1 |
+
---
|
| 2 |
+
{}
|
| 3 |
+
---
|
| 4 |
# SmolDriver
|
| 5 |
|
| 6 |
Fine-tuned SmolVLM2-2.2B model.
|
config.json
CHANGED
|
@@ -124,11 +124,14 @@
|
|
| 124 |
"use_reentrant_checkpointing": false,
|
| 125 |
"vision_config": {
|
| 126 |
"attention_dropout": 0.0,
|
|
|
|
|
|
|
| 127 |
"hidden_act": "gelu_pytorch_tanh",
|
| 128 |
"hidden_size": 1152,
|
| 129 |
"image_size": 384,
|
| 130 |
"initializer_range": 0.02,
|
| 131 |
"intermediate_size": 4304,
|
|
|
|
| 132 |
"layer_norm_eps": 1e-06,
|
| 133 |
"max_image_size": {
|
| 134 |
"longest_edge": 384
|
|
@@ -136,13 +139,13 @@
|
|
| 136 |
"model_type": "smolvlm_vision",
|
| 137 |
"num_attention_heads": 16,
|
| 138 |
"num_channels": 3,
|
|
|
|
| 139 |
"num_hidden_layers": 27,
|
|
|
|
| 140 |
"patch_size": 14,
|
| 141 |
"size": {
|
| 142 |
"longest_edge": 1920
|
| 143 |
},
|
| 144 |
-
"tie_word_embeddings": false,
|
| 145 |
-
"torch_dtype": "bfloat16",
|
| 146 |
"use_base_siglip": false
|
| 147 |
},
|
| 148 |
"vocab_size": 49280
|
|
|
|
| 124 |
"use_reentrant_checkpointing": false,
|
| 125 |
"vision_config": {
|
| 126 |
"attention_dropout": 0.0,
|
| 127 |
+
"embed_dim": 1152,
|
| 128 |
+
"grid_size": 27,
|
| 129 |
"hidden_act": "gelu_pytorch_tanh",
|
| 130 |
"hidden_size": 1152,
|
| 131 |
"image_size": 384,
|
| 132 |
"initializer_range": 0.02,
|
| 133 |
"intermediate_size": 4304,
|
| 134 |
+
"kv_dim": 1152,
|
| 135 |
"layer_norm_eps": 1e-06,
|
| 136 |
"max_image_size": {
|
| 137 |
"longest_edge": 384
|
|
|
|
| 139 |
"model_type": "smolvlm_vision",
|
| 140 |
"num_attention_heads": 16,
|
| 141 |
"num_channels": 3,
|
| 142 |
+
"num_heads": 16,
|
| 143 |
"num_hidden_layers": 27,
|
| 144 |
+
"num_resamplers": 4,
|
| 145 |
"patch_size": 14,
|
| 146 |
"size": {
|
| 147 |
"longest_edge": 1920
|
| 148 |
},
|
|
|
|
|
|
|
| 149 |
"use_base_siglip": false
|
| 150 |
},
|
| 151 |
"vocab_size": 49280
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4016752712
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e474f0cbd3383108b92c8b45aeef93b9b3314669f0a9abc7a7e252cba998597f
|
| 3 |
size 4016752712
|