Update README.md
Browse files
README.md
CHANGED
|
@@ -68,24 +68,31 @@ Here is how to use this model to extract radiological sentence embeddings and ob
|
|
| 68 |
```python
|
| 69 |
import torch
|
| 70 |
from transformers import AutoModel, AutoTokenizer
|
|
|
|
| 71 |
# Load the model and tokenizer
|
| 72 |
url = "microsoft/BiomedVLP-BioViL-T"
|
| 73 |
tokenizer = AutoTokenizer.from_pretrained(url, trust_remote_code=True)
|
| 74 |
model = AutoModel.from_pretrained(url, trust_remote_code=True)
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# Tokenize and compute the sentence embeddings
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
| 86 |
attention_mask=tokenizer_output.attention_mask)
|
| 87 |
-
|
| 88 |
-
|
|
|
|
| 89 |
```
|
| 90 |
|
| 91 |
## Data
|
|
|
|
| 68 |
```python
|
| 69 |
import torch
|
| 70 |
from transformers import AutoModel, AutoTokenizer
|
| 71 |
+
|
| 72 |
# Load the model and tokenizer
|
| 73 |
url = "microsoft/BiomedVLP-BioViL-T"
|
| 74 |
tokenizer = AutoTokenizer.from_pretrained(url, trust_remote_code=True)
|
| 75 |
model = AutoModel.from_pretrained(url, trust_remote_code=True)
|
| 76 |
+
|
| 77 |
+
# Input text prompts describing findings.
|
| 78 |
+
# The order of prompts is adjusted to capture the spectrum from absence of a finding to its temporal progression.
|
| 79 |
+
text_prompts = ["No pleural effusion or pneumothorax is seen",
|
| 80 |
+
"There is no pneumothorax or pleural effusion",
|
| 81 |
+
"The extent of the pleural effusion is reduced.",
|
| 82 |
+
"The extent of the pleural effusion remains constant.",
|
| 83 |
+
"Interval enlargement of pleural effusion"]
|
| 84 |
+
|
| 85 |
# Tokenize and compute the sentence embeddings
|
| 86 |
+
with torch.no_grad():
|
| 87 |
+
tokenizer_output = tokenizer.batch_encode_plus(batch_text_or_text_pairs=text_prompts,
|
| 88 |
+
add_special_tokens=True,
|
| 89 |
+
padding='longest',
|
| 90 |
+
return_tensors='pt')
|
| 91 |
+
embeddings = model.get_projected_text_embeddings(input_ids=tokenizer_output.input_ids,
|
| 92 |
attention_mask=tokenizer_output.attention_mask)
|
| 93 |
+
|
| 94 |
+
# Compute the cosine similarity of sentence embeddings obtained from input text prompts.
|
| 95 |
+
sim = torch.mm(embeddings, embeddings.t())
|
| 96 |
```
|
| 97 |
|
| 98 |
## Data
|