Upload Ovis2ForConditionalGeneration

Browse files

Files changed (3) hide show

config.json +40 -5
generation_config.json +4 -1
model.safetensors +3 -0

config.json CHANGED Viewed

@@ -2,21 +2,28 @@
   "architectures": [
     "Ovis2ForConditionalGeneration"
   ],
   "hidden_size": 1536,
   "image_token_id": 151665,
   "initializer_range": 0.02,
   "model_type": "ovis2",
   "text_config": {
     "architectures": [
       "Qwen2ForCausalLM"
     ],
     "attention_dropout": 0.0,
     "bos_token_id": 151643,
     "eos_token_id": 151645,
     "hidden_act": "silu",
     "hidden_size": 1536,
     "initializer_range": 0.02,
     "intermediate_size": 8960,
     "layer_types": [
       "full_attention",
       "full_attention",
@@ -53,19 +60,30 @@
     "num_attention_heads": 12,
     "num_hidden_layers": 28,
     "num_key_value_heads": 2,
     "rms_norm_eps": 1e-06,
-    "rope_scaling": null,
-    "rope_theta": 1000000.0,
     "sliding_window": null,
     "tie_word_embeddings": true,
-    "torch_dtype": "bfloat16",
     "use_cache": true,
     "use_sliding_window": false,
     "vocab_size": 151936
   },
-  "torch_dtype": "float32",
-  "transformers_version": "4.56.0.dev0",
   "vision_config": {
     "attention_dropout": 0.0,
     "backbone_config": {
       "_attn_implementation_autoset": true,
@@ -151,25 +169,42 @@
       "use_bias": false
     },
     "backbone_kwargs": {},
     "depths": null,
     "drop_cls_token": false,
     "hidden_act": "silu",
     "hidden_size": 1024,
     "hidden_stride": 2,
     "image_size": 448,
     "initializer_range": 0.02,
     "intermediate_size": 2816,
     "mlp_bias": false,
     "model_type": "",
     "num_attention_heads": 8,
     "num_channels": 3,
     "num_hidden_layers": 24,
     "num_visual_indicator_tokens": 5,
     "patch_size": 14,
     "qkv_bias": false,
     "rms_norm_eps": 1e-05,
     "tau": 1.0,
     "tokenize_function": "softmax",
     "use_indicators": false,
     "vocab_size": 65536
   },

   "architectures": [
     "Ovis2ForConditionalGeneration"
   ],
+  "dtype": "float32",
   "hidden_size": 1536,
   "image_token_id": 151665,
   "initializer_range": 0.02,
   "model_type": "ovis2",
   "text_config": {
+    "add_cross_attention": false,
     "architectures": [
       "Qwen2ForCausalLM"
     ],
     "attention_dropout": 0.0,
     "bos_token_id": 151643,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "dtype": "float32",
     "eos_token_id": 151645,
+    "finetuning_task": null,
     "hidden_act": "silu",
     "hidden_size": 1536,
     "initializer_range": 0.02,
     "intermediate_size": 8960,
+    "is_decoder": false,
     "layer_types": [
       "full_attention",
       "full_attention",
     "num_attention_heads": 12,
     "num_hidden_layers": 28,
     "num_key_value_heads": 2,
+    "pad_token_id": null,
+    "prefix": null,
+    "pruned_heads": {},
     "rms_norm_eps": 1e-06,
+    "rope_parameters": {
+      "rope_theta": 1000000.0,
+      "rope_type": "default"
+    },
+    "sep_token_id": null,
     "sliding_window": null,
+    "task_specific_params": null,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
     "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "torchscript": false,
     "use_cache": true,
     "use_sliding_window": false,
     "vocab_size": 151936
   },
+  "tie_word_embeddings": true,
+  "transformers_version": "5.5.0.dev0",
   "vision_config": {
+    "add_cross_attention": false,
     "attention_dropout": 0.0,
     "backbone_config": {
       "_attn_implementation_autoset": true,
       "use_bias": false
     },
     "backbone_kwargs": {},
+    "bos_token_id": null,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
     "depths": null,
     "drop_cls_token": false,
+    "dtype": "float32",
+    "eos_token_id": null,
+    "finetuning_task": null,
     "hidden_act": "silu",
     "hidden_size": 1024,
     "hidden_stride": 2,
     "image_size": 448,
     "initializer_range": 0.02,
     "intermediate_size": 2816,
+    "is_decoder": false,
     "mlp_bias": false,
     "model_type": "",
     "num_attention_heads": 8,
     "num_channels": 3,
     "num_hidden_layers": 24,
     "num_visual_indicator_tokens": 5,
+    "pad_token_id": null,
     "patch_size": 14,
+    "prefix": null,
+    "pruned_heads": {},
     "qkv_bias": false,
     "rms_norm_eps": 1e-05,
+    "sep_token_id": null,
+    "task_specific_params": null,
     "tau": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
     "tokenize_function": "softmax",
+    "tokenizer_class": null,
+    "torchscript": false,
     "use_indicators": false,
     "vocab_size": 65536
   },

generation_config.json CHANGED Viewed

@@ -2,5 +2,8 @@
   "_from_model_config": true,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
-  "transformers_version": "4.56.0.dev0"
 }

   "_from_model_config": true,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
+  "output_attentions": false,
+  "output_hidden_states": false,
+  "transformers_version": "5.5.0.dev0",
+  "use_cache": true
 }

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc5e9c6092d80fbe27004ba0540cbf484f091fc1853bbef7b05b6914eea8f1cb
+size 8891707608