Upload VQA model in safetensors format after training

Browse files

Files changed (3) hide show

config.json +143 -0
metadata.json +9 -0
model.safetensors +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+  "cnn_type": "vit-base",
+  "config_dict": {
+    "answer_spaces": {
+      "choice_multiple": {
+        "barretts": 3,
+        "biopsy forceps": 14,
+        "cecum": 8,
+        "hemorrhoids": 5,
+        "ileum": 6,
+        "injection needle": 13,
+        "metal clip": 11,
+        "none": 15,
+        "oesophagitis": 0,
+        "polyp": 4,
+        "polyp snare": 12,
+        "pylorus": 9,
+        "short-segment barretts": 2,
+        "tube": 10,
+        "ulcerative colitis": 1,
+        "z-line": 7
+      },
+      "choice_single": {
+        "11-20mm": 8,
+        "5-10mm": 7,
+        "<5mm": 6,
+        ">20": 10,
+        ">20mm": 9,
+        "capsule endoscopy": 3,
+        "colonoscopy": 4,
+        "gastroscopy": 5,
+        "none": 11,
+        "paris iia": 1,
+        "paris ip": 0,
+        "paris is": 2
+      },
+      "color": {
+        "black": 3,
+        "blue": 8,
+        "brown": 11,
+        "flesh": 1,
+        "green": 10,
+        "grey": 9,
+        "landmark:grey": 0,
+        "none": 13,
+        "orange": 4,
+        "pink": 2,
+        "purple": 12,
+        "red": 5,
+        "white": 6,
+        "yellow": 7
+      },
+      "location": {
+        "center": 4,
+        "center-left": 3,
+        "center-right": 5,
+        "lower-center": 7,
+        "lower-left": 6,
+        "lower-right": 8,
+        "lower-rigth": 8,
+        "none": 9,
+        "upper-center": 1,
+        "upper-left": 0,
+        "upper-right": 2
+      },
+      "numerical": {
+        "0": 0,
+        "1": 1,
+        "10": 10,
+        "11": 11,
+        "12": 12,
+        "13": 13,
+        "14": 14,
+        "15": 15,
+        "16": 16,
+        "2": 2,
+        "3": 3,
+        "4": 4,
+        "5": 5,
+        "6": 6,
+        "7": 7,
+        "8": 8,
+        "9": 9
+      },
+      "yesno": {
+        "no": 1,
+        "not relevant": 2,
+        "yes": 0
+      }
+    },
+    "batch_size": 32,
+    "captions_file": "data/kvasir-captions.json",
+    "checkpoint_path": "artifacts/vqa_cnn_bilstm.pth",
+    "cnn_out_dim": 512,
+    "dataset_name": "SimulaMet-HOST/Kvasir-VQA",
+    "device": "cuda",
+    "embedding_dim": 128,
+    "hidden_dim": 256,
+    "img_dir": "data/images",
+    "img_size": [
+      224,
+      224
+    ],
+    "jsonl_file": "data/kvasir-vqa.jsonl",
+    "learning_rate": 0.0001,
+    "max_seq_len": 20,
+    "num_epochs": 1,
+    "num_workers": 2,
+    "output_dir": "artifacts/output",
+    "patience": 5,
+    "question_types": {
+      "Are there any abnormalities in the image? Check all that are present.": "choice_multiple",
+      "Are there any anatomical landmarks in the image? Check all that are present.": "choice_multiple",
+      "Are there any instruments in the image? Check all that are present.": "choice_multiple",
+      "Does this image contain any finding?": "yesno",
+      "Have all polyps been removed?": "yesno",
+      "How many findings are present?": "numerical",
+      "How many instruments are in the image?": "numerical",
+      "How many instrumnets are in the image?": "numerical",
+      "How many polyps are in the image?": "numerical",
+      "Is there a green/black box artefact?": "yesno",
+      "Is there text?": "yesno",
+      "Is this finding easy to detect?": "yesno",
+      "What color is the abnormality? If more than one separate with ;": "color",
+      "What color is the anatomical landmark? If more than one separate with ;": "color",
+      "What is the size of the polyp?": "choice_single",
+      "What type of polyp is present?": "choice_single",
+      "What type of procedure is the image taken from?": "choice_single",
+      "Where in the image is the abnormality?": "location",
+      "Where in the image is the anatomical landmark?": "location",
+      "Where in the image is the instrument?": "location"
+    },
+    "seed": 42,
+    "test_split": 0.15,
+    "train_split": 0.7,
+    "use_multi_gpu": true,
+    "val_split": 0.15,
+    "vocab_size": 1399
+  },
+  "model_type": "vqa_cnn_bilstm",
+  "transformers_version": "4.51.1",
+  "vocab_size": 1399
+}

metadata.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "model_type": "vit-base",
+  "dataset": "SimulaMet-HOST/Kvasir-VQA",
+  "training_args": {
+    "batch_size": 32,
+    "num_epochs": 1,
+    "learning_rate": 0.0001
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7b78bf022353183e0a969c90510a50690ec02587edecc2db88d89574b988c4f
+size 368335032