Initial commit of LLaVA model and processor

Files changed (8) hide show

config.json CHANGED Viewed

@@ -44,7 +44,7 @@
   },
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.45.2",
   "vision_config": {
     "hidden_act": "gelu_pytorch_tanh",
     "hidden_size": 1152,

   },
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.47.1",
   "vision_config": {
     "hidden_act": "gelu_pytorch_tanh",
     "hidden_size": 1152,

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "bos_token_id": 1,
   "eos_token_id": 2,
   "pad_token_id": 32001,
-  "transformers_version": "4.45.2"
 }

   "bos_token_id": 1,
   "eos_token_id": 2,
   "pad_token_id": 32001,
+  "transformers_version": "4.47.1"
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0e8517d6c3f6979745cd3927db7597b112a78200d366cd84fc365cc0dc07c6f
 size 4982343568

 version https://git-lfs.github.com/spec/v1
+oid sha256:994bb4543a2c76f44db279303e5bd4917c6c74028896a32cb43ebfe916f924f3
 size 4982343568

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ad0982732bed63d02a71e528973081fd739d22fb92a4dccab1f8b3d03d8099a
 size 4490727736

 version https://git-lfs.github.com/spec/v1
+oid sha256:825fd0df98f839c16025190216e041b2f7cc13ff5e2bd08c53a1858f747516fa
 size 4490727736

preprocessor_config.json CHANGED Viewed

@@ -1,51 +1,97 @@
 {
   "crop_size": {
-    "height": 384,
-    "width": 384
   },
   "do_center_crop": true,
-  "do_convert_rgb": false,
   "do_normalize": true,
   "do_pad": true,
   "do_rescale": true,
-  "do_resize": true,
   "image_grid_pinpoints": [
     [
-      768,
-      384
     ],
     [
-      384,
-      768
     ],
     [
-      768,
-      768
     ],
     [
-      384,
-      1152
     ],
     [
-      1152,
-      384
     ]
   ],
   "image_mean": [
-    0.5,
-    0.5,
-    0.5
   ],
   "image_processor_type": "LlavaNextImageProcessor",
   "image_std": [
-    0.5,
-    0.5,
-    0.5
   ],
   "processor_class": "LlavaNextProcessor",
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
-    "shortest_edge": 384
   }
 }

 {
   "crop_size": {
+    "height": 224,
+    "width": 224
   },
   "do_center_crop": true,
+  "do_convert_rgb": true,
   "do_normalize": true,
   "do_pad": true,
   "do_rescale": true,
+  "do_resize": {
+    "crop_size": {
+      "height": 384,
+      "width": 384
+    },
+    "do_center_crop": true,
+    "do_convert_rgb": false,
+    "do_normalize": true,
+    "do_pad": true,
+    "do_rescale": true,
+    "do_resize": true,
+    "image_grid_pinpoints": [
+      [
+        768,
+        384
+      ],
+      [
+        384,
+        768
+      ],
+      [
+        768,
+        768
+      ],
+      [
+        384,
+        1152
+      ],
+      [
+        1152,
+        384
+      ]
+    ],
+    "image_mean": [
+      0.5,
+      0.5,
+      0.5
+    ],
+    "image_std": [
+      0.5,
+      0.5,
+      0.5
+    ],
+    "size": {
+      "shortest_edge": 384
+    }
+  },
   "image_grid_pinpoints": [
     [
+      336,
+      672
     ],
     [
+      672,
+      336
     ],
     [
+      672,
+      672
     ],
     [
+      1008,
+      336
     ],
     [
+      336,
+      1008
     ]
   ],
   "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
   ],
   "image_processor_type": "LlavaNextImageProcessor",
   "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
   ],
   "processor_class": "LlavaNextProcessor",
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
+    "shortest_edge": 224
   }
 }

processor_config.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "image_token": "<image>",
-  "patch_size": 14,
   "processor_class": "LlavaNextProcessor",
-  "vision_feature_select_strategy": "default"
 }

 {
   "image_token": "<image>",
+  "num_additional_image_tokens": 0,
+  "patch_size": null,
   "processor_class": "LlavaNextProcessor",
+  "vision_feature_select_strategy": null
 }

special_tokens_map.json CHANGED Viewed

@@ -13,6 +13,7 @@
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
     "content": "<unk>",
     "lstrip": false,

     "rstrip": false,
     "single_word": false
   },
+  "image_token": "<image>",
   "pad_token": {
     "content": "<unk>",
     "lstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -37,9 +37,12 @@
     }
   },
   "bos_token": "<s>",
-  "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": false,
   "model_max_length": 4096,
   "pad_token": "<unk>",

     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "extra_special_tokens": {
+    "image_token": "<image>"
+  },
+  "image_token": "<image>",
   "legacy": false,
   "model_max_length": 4096,
   "pad_token": "<unk>",