jichao commited on
Commit ·
4725c4d
1
Parent(s): 6d85953
README
Browse files
README.md
CHANGED
|
@@ -17,7 +17,7 @@ Model Card for Mars ViT Base Model
|
|
| 17 |
- Dataset: 2 million CTX images
|
| 18 |
|
| 19 |
## Usage Examples
|
| 20 |
-
### Using timm
|
| 21 |
|
| 22 |
First download checkpoint-1199.pth (backbone only)
|
| 23 |
|
|
@@ -34,6 +34,16 @@ model = timm.create_model(
|
|
| 34 |
)
|
| 35 |
|
| 36 |
model.eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
x = torch.randn(1, 1, 224, 224)
|
| 38 |
with torch.no_grad():
|
| 39 |
features = model.forward_features(x) # shape [1, tokens, embed_dim]
|
|
@@ -54,9 +64,12 @@ image_processor = AutoImageProcessor.from_pretrained("jfang/mars-vit-base-ctx2m"
|
|
| 54 |
from PIL import Image
|
| 55 |
image = Image.open("some_image.png").convert("L") # 1-channel
|
| 56 |
inputs = image_processor(image, return_tensors="pt")
|
|
|
|
|
|
|
| 57 |
outputs = model(**inputs)
|
| 58 |
```
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
### Limitations
|
| 62 |
The model is trained specifically on CTX images and may not generalize well to other types of images without further fine-tuning.
|
|
|
|
| 17 |
- Dataset: 2 million CTX images
|
| 18 |
|
| 19 |
## Usage Examples
|
| 20 |
+
### Using timm (suggested now)
|
| 21 |
|
| 22 |
First download checkpoint-1199.pth (backbone only)
|
| 23 |
|
|
|
|
| 34 |
)
|
| 35 |
|
| 36 |
model.eval()
|
| 37 |
+
|
| 38 |
+
# for images, need to convert to single channel, 224, and normalize
|
| 39 |
+
|
| 40 |
+
# transform example:
|
| 41 |
+
# transform = transforms.Compose([
|
| 42 |
+
# transforms.ToTensor(),
|
| 43 |
+
# transforms.Resize((224, 224)),
|
| 44 |
+
# transforms.Grayscale(num_output_channels=1),
|
| 45 |
+
# transforms.Normalize(mean=[0.5], std=[0.5])
|
| 46 |
+
# ])
|
| 47 |
x = torch.randn(1, 1, 224, 224)
|
| 48 |
with torch.no_grad():
|
| 49 |
features = model.forward_features(x) # shape [1, tokens, embed_dim]
|
|
|
|
| 64 |
from PIL import Image
|
| 65 |
image = Image.open("some_image.png").convert("L") # 1-channel
|
| 66 |
inputs = image_processor(image, return_tensors="pt")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
outputs = model(**inputs)
|
| 70 |
```
|
| 71 |
+
## MAE reconstruction
|
| 72 |
+
Under ./mae folder, there is full encoder-decoder MAE model and a notebook for visualization.
|
| 73 |
|
| 74 |
### Limitations
|
| 75 |
The model is trained specifically on CTX images and may not generalize well to other types of images without further fine-tuning.
|