Model split into two vision_model and text_decoder.

Browse files

Files changed (4) hide show

epcert_cfg_test_blip.json +26 -0
model_config.json +273 -0
split_0.onnx +3 -0
split_1.onnx +3 -0

epcert_cfg_test_blip.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "$schema": "../../../schema_gen/src/Config.json",
+    "version": 3,
+    "models": [
+        {
+            "modelId": "Salesforce/blip-image-captioning-base",
+            "tasks": [ {"taskId": "image-captioning"} ],
+            "modelPath": {
+                "image_encoder": "C:\\Users\\johnpaul\\mine\\conda\\output\\split_0.onnx",
+                "text_decoder": "C:\\Users\\johnpaul\\mine\\conda\\output\\split_1.onnx"
+            }
+        }
+    ],
+    "eps":[
+        {
+            "epId": {
+                "name": "CPUExecutionProvider",
+                "deviceType": "cpu"
+            }
+        }
+    ],
+    "settings":{
+        "quickRun": true,
+        "skipCompilation": true
+    }
+}

model_config.json ADDED Viewed

	@@ -0,0 +1,273 @@

+{
+    "type": "compositemodel",
+    "config": {
+        "model_path": "C:\\Users\\johnpaul\\mine\\conda\\cache\\default_workflow\\runs\\281e7214\\models",
+        "model_component_names": [
+            "split_0",
+            "split_1"
+        ],
+        "model_attributes": {
+            "return_dict": true,
+            "output_hidden_states": false,
+            "output_attentions": false,
+            "torchscript": false,
+            "torch_dtype": "float32",
+            "use_bfloat16": false,
+            "tf_legacy_loss": false,
+            "pruned_heads": {},
+            "tie_word_embeddings": true,
+            "chunk_size_feed_forward": 0,
+            "is_encoder_decoder": false,
+            "is_decoder": false,
+            "cross_attention_hidden_size": null,
+            "add_cross_attention": false,
+            "tie_encoder_decoder": false,
+            "max_length": 20,
+            "min_length": 0,
+            "do_sample": false,
+            "early_stopping": false,
+            "num_beams": 1,
+            "num_beam_groups": 1,
+            "diversity_penalty": 0.0,
+            "temperature": 1.0,
+            "top_k": 50,
+            "top_p": 1.0,
+            "typical_p": 1.0,
+            "repetition_penalty": 1.0,
+            "length_penalty": 1.0,
+            "no_repeat_ngram_size": 0,
+            "encoder_no_repeat_ngram_size": 0,
+            "bad_words_ids": null,
+            "num_return_sequences": 1,
+            "output_scores": false,
+            "return_dict_in_generate": false,
+            "forced_bos_token_id": null,
+            "forced_eos_token_id": null,
+            "remove_invalid_values": false,
+            "exponential_decay_length_penalty": null,
+            "suppress_tokens": null,
+            "begin_suppress_tokens": null,
+            "architectures": [
+                "BlipForConditionalGeneration"
+            ],
+            "finetuning_task": null,
+            "id2label": {
+                "0": "LABEL_0",
+                "1": "LABEL_1"
+            },
+            "label2id": {
+                "LABEL_0": 0,
+                "LABEL_1": 1
+            },
+            "tokenizer_class": null,
+            "prefix": null,
+            "bos_token_id": null,
+            "pad_token_id": null,
+            "eos_token_id": null,
+            "sep_token_id": null,
+            "decoder_start_token_id": null,
+            "task_specific_params": null,
+            "problem_type": null,
+            "_name_or_path": "Salesforce/blip-image-captioning-base",
+            "transformers_version": "4.52.4",
+            "initializer_factor": 1.0,
+            "model_type": "blip",
+            "text_config": {
+                "return_dict": true,
+                "output_hidden_states": false,
+                "output_attentions": false,
+                "torchscript": false,
+                "torch_dtype": null,
+                "use_bfloat16": false,
+                "tf_legacy_loss": false,
+                "pruned_heads": {},
+                "tie_word_embeddings": true,
+                "chunk_size_feed_forward": 0,
+                "is_encoder_decoder": false,
+                "is_decoder": true,
+                "cross_attention_hidden_size": null,
+                "add_cross_attention": false,
+                "tie_encoder_decoder": false,
+                "max_length": 20,
+                "min_length": 0,
+                "do_sample": false,
+                "early_stopping": false,
+                "num_beams": 1,
+                "num_beam_groups": 1,
+                "diversity_penalty": 0.0,
+                "temperature": 1.0,
+                "top_k": 50,
+                "top_p": 1.0,
+                "typical_p": 1.0,
+                "repetition_penalty": 1.0,
+                "length_penalty": 1.0,
+                "no_repeat_ngram_size": 0,
+                "encoder_no_repeat_ngram_size": 0,
+                "bad_words_ids": null,
+                "num_return_sequences": 1,
+                "output_scores": false,
+                "return_dict_in_generate": false,
+                "forced_bos_token_id": null,
+                "forced_eos_token_id": null,
+                "remove_invalid_values": false,
+                "exponential_decay_length_penalty": null,
+                "suppress_tokens": null,
+                "begin_suppress_tokens": null,
+                "architectures": null,
+                "finetuning_task": null,
+                "id2label": {
+                    "0": "LABEL_0",
+                    "1": "LABEL_1"
+                },
+                "label2id": {
+                    "LABEL_0": 0,
+                    "LABEL_1": 1
+                },
+                "tokenizer_class": null,
+                "prefix": null,
+                "bos_token_id": 30522,
+                "pad_token_id": 0,
+                "eos_token_id": 2,
+                "sep_token_id": 102,
+                "decoder_start_token_id": null,
+                "task_specific_params": null,
+                "problem_type": null,
+                "_name_or_path": "C:\\Users\\johnpaul\\mine\\conda",
+                "initializer_factor": 1.0,
+                "model_type": "blip_text_model",
+                "vocab_size": 30524,
+                "hidden_size": 768,
+                "encoder_hidden_size": 768,
+                "intermediate_size": 3072,
+                "projection_dim": 768,
+                "hidden_dropout_prob": 0.0,
+                "num_hidden_layers": 12,
+                "num_attention_heads": 12,
+                "max_position_embeddings": 512,
+                "layer_norm_eps": 1e-12,
+                "hidden_act": "gelu",
+                "initializer_range": 0.02,
+                "attention_probs_dropout_prob": 0.0,
+                "use_cache": true,
+                "label_smoothing": 0.0
+            },
+            "vision_config": {
+                "return_dict": true,
+                "output_hidden_states": false,
+                "output_attentions": false,
+                "torchscript": false,
+                "torch_dtype": null,
+                "use_bfloat16": false,
+                "tf_legacy_loss": false,
+                "pruned_heads": {},
+                "tie_word_embeddings": true,
+                "chunk_size_feed_forward": 0,
+                "is_encoder_decoder": false,
+                "is_decoder": false,
+                "cross_attention_hidden_size": null,
+                "add_cross_attention": false,
+                "tie_encoder_decoder": false,
+                "max_length": 20,
+                "min_length": 0,
+                "do_sample": false,
+                "early_stopping": false,
+                "num_beams": 1,
+                "num_beam_groups": 1,
+                "diversity_penalty": 0.0,
+                "temperature": 1.0,
+                "top_k": 50,
+                "top_p": 1.0,
+                "typical_p": 1.0,
+                "repetition_penalty": 1.0,
+                "length_penalty": 1.0,
+                "no_repeat_ngram_size": 0,
+                "encoder_no_repeat_ngram_size": 0,
+                "bad_words_ids": null,
+                "num_return_sequences": 1,
+                "output_scores": false,
+                "return_dict_in_generate": false,
+                "forced_bos_token_id": null,
+                "forced_eos_token_id": null,
+                "remove_invalid_values": false,
+                "exponential_decay_length_penalty": null,
+                "suppress_tokens": null,
+                "begin_suppress_tokens": null,
+                "architectures": null,
+                "finetuning_task": null,
+                "id2label": {
+                    "0": "LABEL_0",
+                    "1": "LABEL_1"
+                },
+                "label2id": {
+                    "LABEL_0": 0,
+                    "LABEL_1": 1
+                },
+                "tokenizer_class": null,
+                "prefix": null,
+                "bos_token_id": null,
+                "pad_token_id": null,
+                "eos_token_id": null,
+                "sep_token_id": null,
+                "decoder_start_token_id": null,
+                "task_specific_params": null,
+                "problem_type": null,
+                "_name_or_path": "C:\\Users\\johnpaul\\mine\\conda",
+                "dropout": 0.0,
+                "initializer_factor": 1.0,
+                "model_type": "blip_vision_model",
+                "num_channels": 3,
+                "hidden_size": 768,
+                "intermediate_size": 3072,
+                "projection_dim": 512,
+                "num_hidden_layers": 12,
+                "num_attention_heads": 12,
+                "patch_size": 16,
+                "image_size": 384,
+                "initializer_range": 0.02,
+                "attention_dropout": 0.0,
+                "layer_norm_eps": 1e-05,
+                "hidden_act": "gelu"
+            },
+            "projection_dim": 512,
+            "logit_scale_init_value": 2.6592,
+            "initializer_range": 0.02,
+            "image_text_hidden_size": 256,
+            "label_smoothing": 0.0,
+            "split_assignments": {
+                "vision_model.embeddings": 0,
+                "vision_model.encoder": 0,
+                "vision_model.post_layernorm": 0,
+                "text_decoder.bert": 1,
+                "text_decoder.cls": 1
+            },
+            "hf_task": "image-to-text",
+            "type": "hfmodel"
+        },
+        "model_components": [
+            {
+                "type": "onnxmodel",
+                "config": {
+                    "model_path": "C:\\Users\\johnpaul\\mine\\conda\\output",
+                    "onnx_file_name": "split_0.onnx",
+                    "inference_settings": null,
+                    "use_ort_extensions": false,
+                    "external_initializers_file_name": null,
+                    "constant_inputs_file_name": null,
+                    "model_attributes": null
+                }
+            },
+            {
+                "type": "onnxmodel",
+                "config": {
+                    "model_path": "C:\\Users\\johnpaul\\mine\\conda\\output",
+                    "onnx_file_name": "split_1.onnx",
+                    "inference_settings": null,
+                    "use_ort_extensions": false,
+                    "external_initializers_file_name": null,
+                    "constant_inputs_file_name": null,
+                    "model_attributes": null
+                }
+            }
+        ]
+    }
+}

split_0.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1641f50623c959c20b6ee0dbc9628bad83fe9108fe190c5fc4e0a24a73785938
+size 345122738

split_1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6440987e079271614aa19034cf0fa25ec0e31d35efe64674130b8e8cfa860466
+size 647427238