Upload 3 files

Files changed (3) hide show

README.md ADDED Viewed

+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# ===== 加载模型 =====
+# model = AutoModel.from_pretrained(
+#     "StanfordAIMI/CheXficient",
+#     trust_remote_code=True
+# ).to(device)
+model = AutoModel.from_pretrained(
+    "/mnt/d/torch/CheXficient/hf_model",
+    trust_remote_code=True
+).to(device)
+# ===== 加载tokenizer =====
+tokenizer = AutoTokenizer.from_pretrained(
+    "emilyalsentzer/Bio_ClinicalBERT"
+)
+# ===== 加载image processor =====
+image_processor = AutoImageProcessor.from_pretrained(
+    "facebook/dinov2-base"
+)
+# ===== 准备数据 =====
+image = Image.open("xray.jpg").convert("RGB")
+text = ["pneumonia", "no acute cardiopulmonary abnormality"]
+image_inputs = image_processor(images=image, return_tensors="pt").to(device)
+text_inputs = tokenizer(text, padding=True, return_tensors="pt").to(device)
+# ===== 推理 =====
+with torch.no_grad():
+    outputs = model(
+        pixel_values=image_inputs["pixel_values"],
+        input_ids=text_inputs["input_ids"],
+        attention_mask=text_inputs["attention_mask"],
+    )
+logits = outputs["logits_per_image"]
+probs = logits.softmax(dim=-1)
+print(probs)

config.json CHANGED Viewed

@@ -3,6 +3,7 @@
     "CheXficientModel"
   ],
   "image_size": 378,
   "model_type": "chexficient_clip",
   "projection_dim": 512,
   "text_model_name": "emilyalsentzer/Bio_ClinicalBERT",

     "CheXficientModel"
   ],
   "image_size": 378,
+  "max_bert_length": 256,
   "model_type": "chexficient_clip",
   "projection_dim": 512,
   "text_model_name": "emilyalsentzer/Bio_ClinicalBERT",

configuration_chexficient.py CHANGED Viewed

@@ -9,6 +9,7 @@ class CheXficientConfig(PretrainedConfig):
         text_model_name="emilyalsentzer/Bio_ClinicalBERT",
         projection_dim=512,
         image_size=378,
         **kwargs
     ):
         super().__init__(**kwargs)
@@ -16,4 +17,5 @@ class CheXficientConfig(PretrainedConfig):
         self.vision_model_name = vision_model_name
         self.text_model_name = text_model_name
         self.projection_dim = projection_dim
-        self.image_size = image_size

         text_model_name="emilyalsentzer/Bio_ClinicalBERT",
         projection_dim=512,
         image_size=378,
+        max_bert_length=256,
         **kwargs
     ):
         super().__init__(**kwargs)
         self.vision_model_name = vision_model_name
         self.text_model_name = text_model_name
         self.projection_dim = projection_dim
+        self.image_size = image_size
+        self.max_bert_length = max_bert_length