cwangrun commited on
Commit
9acd189
·
verified ·
1 Parent(s): ba779f8

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +47 -0
  2. config.json +1 -0
  3. configuration_chexficient.py +3 -1
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
4
+
5
+ device = "cuda" if torch.cuda.is_available() else "cpu"
6
+
7
+ # ===== 加载模型 =====
8
+ # model = AutoModel.from_pretrained(
9
+ # "StanfordAIMI/CheXficient",
10
+ # trust_remote_code=True
11
+ # ).to(device)
12
+
13
+ model = AutoModel.from_pretrained(
14
+ "/mnt/d/torch/CheXficient/hf_model",
15
+ trust_remote_code=True
16
+ ).to(device)
17
+
18
+
19
+ # ===== 加载tokenizer =====
20
+ tokenizer = AutoTokenizer.from_pretrained(
21
+ "emilyalsentzer/Bio_ClinicalBERT"
22
+ )
23
+
24
+ # ===== 加载image processor =====
25
+ image_processor = AutoImageProcessor.from_pretrained(
26
+ "facebook/dinov2-base"
27
+ )
28
+
29
+ # ===== 准备数据 =====
30
+ image = Image.open("xray.jpg").convert("RGB")
31
+ text = ["pneumonia", "no acute cardiopulmonary abnormality"]
32
+
33
+ image_inputs = image_processor(images=image, return_tensors="pt").to(device)
34
+ text_inputs = tokenizer(text, padding=True, return_tensors="pt").to(device)
35
+
36
+ # ===== 推理 =====
37
+ with torch.no_grad():
38
+ outputs = model(
39
+ pixel_values=image_inputs["pixel_values"],
40
+ input_ids=text_inputs["input_ids"],
41
+ attention_mask=text_inputs["attention_mask"],
42
+ )
43
+
44
+ logits = outputs["logits_per_image"]
45
+ probs = logits.softmax(dim=-1)
46
+
47
+ print(probs)
config.json CHANGED
@@ -3,6 +3,7 @@
3
  "CheXficientModel"
4
  ],
5
  "image_size": 378,
 
6
  "model_type": "chexficient_clip",
7
  "projection_dim": 512,
8
  "text_model_name": "emilyalsentzer/Bio_ClinicalBERT",
 
3
  "CheXficientModel"
4
  ],
5
  "image_size": 378,
6
+ "max_bert_length": 256,
7
  "model_type": "chexficient_clip",
8
  "projection_dim": 512,
9
  "text_model_name": "emilyalsentzer/Bio_ClinicalBERT",
configuration_chexficient.py CHANGED
@@ -9,6 +9,7 @@ class CheXficientConfig(PretrainedConfig):
9
  text_model_name="emilyalsentzer/Bio_ClinicalBERT",
10
  projection_dim=512,
11
  image_size=378,
 
12
  **kwargs
13
  ):
14
  super().__init__(**kwargs)
@@ -16,4 +17,5 @@ class CheXficientConfig(PretrainedConfig):
16
  self.vision_model_name = vision_model_name
17
  self.text_model_name = text_model_name
18
  self.projection_dim = projection_dim
19
- self.image_size = image_size
 
 
9
  text_model_name="emilyalsentzer/Bio_ClinicalBERT",
10
  projection_dim=512,
11
  image_size=378,
12
+ max_bert_length=256,
13
  **kwargs
14
  ):
15
  super().__init__(**kwargs)
 
17
  self.vision_model_name = vision_model_name
18
  self.text_model_name = text_model_name
19
  self.projection_dim = projection_dim
20
+ self.image_size = image_size
21
+ self.max_bert_length = max_bert_length