microsoft
/

BiomedVLP-BioViL-T

Feature Extraction

Model card Files Files and versions

ozanoktay commited on Mar 16, 2023

Commit

a0adb7d

·

1 Parent(s): 371998e

Update README.md

Files changed (1) hide show

README.md +19 -12

README.md CHANGED Viewed

@@ -68,24 +68,31 @@ Here is how to use this model to extract radiological sentence embeddings and ob
 ```python
 import torch
 from transformers import AutoModel, AutoTokenizer
 # Load the model and tokenizer
 url = "microsoft/BiomedVLP-BioViL-T"
 tokenizer = AutoTokenizer.from_pretrained(url, trust_remote_code=True)
 model = AutoModel.from_pretrained(url, trust_remote_code=True)
-# Input text prompts (e.g., reference, synonym, contradiction)
-text_prompts = ["There is no pneumothorax or pleural effusion",
-                "No pleural effusion or pneumothorax is seen",
-                "The extent of the pleural effusion is constant."
-                "Interval enlargement of moderate pleural effusion"]
 # Tokenize and compute the sentence embeddings
-tokenizer_output = tokenizer.batch_encode_plus(batch_text_or_text_pairs=text_prompts,
-                                               add_special_tokens=True,
-                                               padding='longest',
-                                               return_tensors='pt')
-embeddings = model.get_projected_text_embeddings(input_ids=tokenizer_output.input_ids,
                                                  attention_mask=tokenizer_output.attention_mask)
-# Compute the cosine similarity of sentence embeddings obtained from input text prompts.
-sim = torch.mm(embeddings, embeddings.t())
 ```
 ## Data

 ```python
 import torch
 from transformers import AutoModel, AutoTokenizer
 # Load the model and tokenizer
 url = "microsoft/BiomedVLP-BioViL-T"
 tokenizer = AutoTokenizer.from_pretrained(url, trust_remote_code=True)
 model = AutoModel.from_pretrained(url, trust_remote_code=True)
+# Input text prompts describing findings.
+# The order of prompts is adjusted to capture the spectrum from absence of a finding to its temporal progression.
+text_prompts = ["No pleural effusion or pneumothorax is seen",
+                "There is no pneumothorax or pleural effusion",
+                "The extent of the pleural effusion is reduced.",
+                "The extent of the pleural effusion remains constant.",
+                "Interval enlargement of pleural effusion"]
 # Tokenize and compute the sentence embeddings
+with torch.no_grad():
+    tokenizer_output = tokenizer.batch_encode_plus(batch_text_or_text_pairs=text_prompts,
+                                                   add_special_tokens=True,
+                                                   padding='longest',
+                                                   return_tensors='pt')
+    embeddings = model.get_projected_text_embeddings(input_ids=tokenizer_output.input_ids,
                                                  attention_mask=tokenizer_output.attention_mask)
+    # Compute the cosine similarity of sentence embeddings obtained from input text prompts.
+    sim = torch.mm(embeddings, embeddings.t())
 ```
 ## Data