RioJune
/

AG-KD

Zero-Shot Object Detection

image-text-to-text

Model card Files Files and versions

Fixed README.md

#4

by shantam00 - opened Aug 28, 2025

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

Files changed (1) hide show

README.md +6 -4

README.md CHANGED Viewed

@@ -31,20 +31,22 @@ Here's a basic example of how to use the model for abnormality grounding:
 ```python
 import torch
 from PIL import Image
-from transformers import AutoModel, AutoProcessor
 # Load model and processor
 model_id = "RioJune/AG-KD"
-model = AutoModel.from_pretrained(model_id, trust_remote_code=True)
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 # Example image (replace with your medical image path)
 # Ensure 'your_medical_image.png' exists in your directory or provide a full path.
 image = Image.open("path/to/your/medical_image.png").convert("RGB")
 # Example instruction for abnormality grounding
-# The model expects instructions to start with specific tokens like <OD> for object detection.
-instruction = "<OD> Please localize the lesion. "
 # Prepare inputs
 inputs = processor(images=image, text=instruction, return_tensors="pt")

 ```python
 import torch
 from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor
 # Load model and processor
 model_id = "RioJune/AG-KD"
+model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 # Example image (replace with your medical image path)
 # Ensure 'your_medical_image.png' exists in your directory or provide a full path.
 image = Image.open("path/to/your/medical_image.png").convert("RGB")
+# The model expects instructions to start with specific tokens such as <OD>, <CAPTION_FOR_PHRASE_GROUNDING> and <CAPTION>, depending on the task.
 # Example instruction for abnormality grounding
+target = "pulmonary fibrosis"
+definition = "Scarring of the lung tissue creating a dense fibrous appearance."
+instruction = f"<CAPTION_TO_PHRASE_GROUNDING>Locate the phrases in the caption: {target} means {definition}."
 # Prepare inputs
 inputs = processor(images=image, text=instruction, return_tensors="pt")