Add Prost40M model weights and metadata
Browse files- LICENSE +3 -0
- README.md +71 -0
- config.json +43 -0
- model.safetensors +3 -0
- payload_manifest.json +34 -0
- preprocessor_config.json +21 -0
LICENSE
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
https://www.apache.org/licenses/LICENSE-2.0
|
README.md
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Prost40M
|
| 2 |
+
|
| 3 |
+
**Prost40M** is a prostatectomy-specific foundation model pretrained with DINO on a large corpus of H&E prostatectomy slides.
|
| 4 |
+
It is designed as a strong feature extractor for computational pathology tasks where subtle prostate-specific morphology matters.
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
## Model At a Glance
|
| 8 |
+
|
| 9 |
+
| Field | Value |
|
| 10 |
+
| --- | --- |
|
| 11 |
+
| Model name | Prost40M |
|
| 12 |
+
| Backbone architecture | `vit_small` |
|
| 13 |
+
| Input size | `224 x 224` |
|
| 14 |
+
| Patch size | `14` |
|
| 15 |
+
| Embedding dimension | `384` |
|
| 16 |
+
| Released weights | Teacher backbone encoder |
|
| 17 |
+
| Domain | H&E prostatectomy histopathology |
|
| 18 |
+
|
| 19 |
+
## Quickstart
|
| 20 |
+
|
| 21 |
+
```python
|
| 22 |
+
import torch
|
| 23 |
+
import timm
|
| 24 |
+
from PIL import Image
|
| 25 |
+
from timm.data import resolve_data_config
|
| 26 |
+
from timm.data.transforms_factory import create_transform
|
| 27 |
+
|
| 28 |
+
model = timm.create_model("hf-hub:waticlems/Prost40M", pretrained=True)
|
| 29 |
+
model.eval()
|
| 30 |
+
|
| 31 |
+
transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model))
|
| 32 |
+
|
| 33 |
+
img = Image.open("tile.png").convert("RGB")
|
| 34 |
+
x = transform(img).unsqueeze(0)
|
| 35 |
+
with torch.inference_mode():
|
| 36 |
+
embedding = model(x) # shape: [1, 384]
|
| 37 |
+
print(embedding.shape)
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
## Motivation
|
| 41 |
+
|
| 42 |
+
Large pathology foundation models are typically trained on broad, multi-organ
|
| 43 |
+
data. Their generic features transfer well across many settings, but can be less
|
| 44 |
+
sensitive to fine-grained morphology of a specific organ. Prost40M was developed
|
| 45 |
+
to evaluate the value of organ-specific pretraining in prostate histopathology.
|
| 46 |
+
|
| 47 |
+
## Training Data
|
| 48 |
+
|
| 49 |
+
- Approx. 40 million image tiles at `0.50` microns per pixel
|
| 50 |
+
- 1888 H&E-stained prostatectomy slides
|
| 51 |
+
- 449 slides from 403 patients in the TCGA-PRAD cohort
|
| 52 |
+
- 1439 slides from 508 patients in the LEOPARD cohort
|
| 53 |
+
|
| 54 |
+
## Intended Use
|
| 55 |
+
|
| 56 |
+
- Tile-level feature extraction for downstream prostate histopathology tasks
|
| 57 |
+
|
| 58 |
+
## Limitations
|
| 59 |
+
|
| 60 |
+
- Performance can degrade under domain shift (scanner, stain protocol, center)
|
| 61 |
+
- Learned representations reflect dataset composition and preprocessing choices
|
| 62 |
+
|
| 63 |
+
## License
|
| 64 |
+
|
| 65 |
+
Apache-2.0
|
| 66 |
+
|
| 67 |
+
## Citation
|
| 68 |
+
|
| 69 |
+
If you use **Prost40M**, cite:
|
| 70 |
+
|
| 71 |
+
- _citation to be added soon_
|
config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architecture": "vit_small_patch16_224",
|
| 3 |
+
"model_args": {
|
| 4 |
+
"patch_size": 14,
|
| 5 |
+
"img_size": 224,
|
| 6 |
+
"num_classes": 0
|
| 7 |
+
},
|
| 8 |
+
"num_classes": 0,
|
| 9 |
+
"pretrained_cfg": {
|
| 10 |
+
"architecture": "vit_small_patch16_224",
|
| 11 |
+
"custom_load": false,
|
| 12 |
+
"input_size": [
|
| 13 |
+
3,
|
| 14 |
+
224,
|
| 15 |
+
224
|
| 16 |
+
],
|
| 17 |
+
"fixed_input_size": true,
|
| 18 |
+
"interpolation": "bicubic",
|
| 19 |
+
"crop_pct": 1.0,
|
| 20 |
+
"mean": [
|
| 21 |
+
0.485,
|
| 22 |
+
0.456,
|
| 23 |
+
0.406
|
| 24 |
+
],
|
| 25 |
+
"std": [
|
| 26 |
+
0.229,
|
| 27 |
+
0.224,
|
| 28 |
+
0.225
|
| 29 |
+
],
|
| 30 |
+
"first_conv": "patch_embed.proj",
|
| 31 |
+
"classifier": "head",
|
| 32 |
+
"num_features": 384,
|
| 33 |
+
"license": "apache-2.0",
|
| 34 |
+
"tags": [
|
| 35 |
+
"histopathology",
|
| 36 |
+
"self-supervised-learning",
|
| 37 |
+
"dino",
|
| 38 |
+
"vision-transformer",
|
| 39 |
+
"prostate",
|
| 40 |
+
"he-stain"
|
| 41 |
+
]
|
| 42 |
+
}
|
| 43 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dd486b73a30adbc6d8dc6419876d01ce19bf897ac3db1e86afa65323a0ab7f4
|
| 3 |
+
size 86492048
|
payload_manifest.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"repo_id": "waticlems/Prost40M",
|
| 3 |
+
"artifact_dir": "/data/pathology/projects/clement/models/hf/Prost40M",
|
| 4 |
+
"payload_dir": "/data/pathology/projects/clement/models/hf/Prost40M/payload",
|
| 5 |
+
"license": "apache-2.0",
|
| 6 |
+
"private": false,
|
| 7 |
+
"built_at_utc": "2026-02-20T15:09:01.002806+00:00",
|
| 8 |
+
"files": [
|
| 9 |
+
"LICENSE",
|
| 10 |
+
"README.md",
|
| 11 |
+
"config.json",
|
| 12 |
+
"model.safetensors",
|
| 13 |
+
"payload_manifest.json",
|
| 14 |
+
"preprocessor_config.json"
|
| 15 |
+
],
|
| 16 |
+
"timm_validation": {
|
| 17 |
+
"model_name": "vit_small_patch16_224",
|
| 18 |
+
"model_args": {
|
| 19 |
+
"patch_size": 14,
|
| 20 |
+
"img_size": 224,
|
| 21 |
+
"num_classes": 0
|
| 22 |
+
},
|
| 23 |
+
"input_shape": [
|
| 24 |
+
1,
|
| 25 |
+
3,
|
| 26 |
+
224,
|
| 27 |
+
224
|
| 28 |
+
],
|
| 29 |
+
"output_shape": [
|
| 30 |
+
1,
|
| 31 |
+
384
|
| 32 |
+
]
|
| 33 |
+
}
|
| 34 |
+
}
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_resize": false,
|
| 3 |
+
"do_center_crop": true,
|
| 4 |
+
"crop_size": {
|
| 5 |
+
"height": 224,
|
| 6 |
+
"width": 224
|
| 7 |
+
},
|
| 8 |
+
"do_rescale": false,
|
| 9 |
+
"do_normalize": true,
|
| 10 |
+
"image_mean": [
|
| 11 |
+
0.485,
|
| 12 |
+
0.456,
|
| 13 |
+
0.406
|
| 14 |
+
],
|
| 15 |
+
"image_std": [
|
| 16 |
+
0.229,
|
| 17 |
+
0.224,
|
| 18 |
+
0.225
|
| 19 |
+
],
|
| 20 |
+
"do_convert_rgb": true
|
| 21 |
+
}
|