Upload README.md with huggingface_hub
Browse files
README.md
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc-by-4.0
|
| 3 |
+
library_name: erdes
|
| 4 |
+
tags:
|
| 5 |
+
- ocular-ultrasound
|
| 6 |
+
- medical-imaging
|
| 7 |
+
- 3d-classification
|
| 8 |
+
- retinal-detachment
|
| 9 |
+
pipeline_tag: image-classification
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# UNET3D — Normal Vs Rd
|
| 13 |
+
|
| 14 |
+
Trained model weights for **retinal detachment classification (normal vs. RD)** using ocular ultrasound videos.
|
| 15 |
+
|
| 16 |
+
| Resource | Link |
|
| 17 |
+
|----------|------|
|
| 18 |
+
| Paper | [](https://arxiv.org/abs/2508.04735) |
|
| 19 |
+
| Dataset | [](https://huggingface.co/datasets/pcvlab/erdes) [](https://zenodo.org/records/18644370) |
|
| 20 |
+
| Checkpoints | [](https://zenodo.org/records/18821031) |
|
| 21 |
+
| Code | [](https://github.com/OSUPCVLab/ERDES) |
|
| 22 |
+
|
| 23 |
+
## Model Details
|
| 24 |
+
|
| 25 |
+
| Property | Value |
|
| 26 |
+
|----------|-------|
|
| 27 |
+
| Architecture | 3D U-Net (f_maps=[64,128,256,512,768]) |
|
| 28 |
+
| Input modality | 3D ocular ultrasound video |
|
| 29 |
+
| Input shape | `[1, 96, 128, 128]` (C, D, H, W) |
|
| 30 |
+
| Pooling | Global Average Pooling |
|
| 31 |
+
| Output | Binary classification (sigmoid) |
|
| 32 |
+
|
| 33 |
+
## Labels
|
| 34 |
+
|
| 35 |
+
| Label | Class |
|
| 36 |
+
|-------|-------|
|
| 37 |
+
| 0 | Normal |
|
| 38 |
+
| 1 | Retinal Detachment |
|
| 39 |
+
|
| 40 |
+
## Usage
|
| 41 |
+
|
| 42 |
+
```bash
|
| 43 |
+
pip install git+https://github.com/OSUPCVLab/ERDES.git ultralytics
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
```python
|
| 47 |
+
import torch
|
| 48 |
+
import numpy as np
|
| 49 |
+
from huggingface_hub import hf_hub_download
|
| 50 |
+
from safetensors.torch import load_file
|
| 51 |
+
from ultralytics import YOLO
|
| 52 |
+
from erdes.models.components.cls_model import Unet3DClassifier
|
| 53 |
+
from erdes.data.components.utils import resize
|
| 54 |
+
|
| 55 |
+
# --- 1. Load YOLO for ocular globe detection ---
|
| 56 |
+
yolo = YOLO(hf_hub_download("pcvlab/yolov8_ocular_ultrasound_globe_detection", "yolov8_ocular_ultrasound_globe_detection.pt"))
|
| 57 |
+
|
| 58 |
+
# --- 2. Crop your POCUS ultrasound video using YOLO (finds largest globe bbox across all frames) ---
|
| 59 |
+
def crop_video(video_path, model, conf=0.8):
|
| 60 |
+
# First pass: find the largest bounding box across all frames
|
| 61 |
+
area_max, cropping_bbox = 0, None
|
| 62 |
+
for frame in model.predict(video_path, stream=True, verbose=False, conf=conf):
|
| 63 |
+
if len(frame.boxes.xywhn):
|
| 64 |
+
bbox = frame.boxes.xywhn[0].cpu().numpy()
|
| 65 |
+
area = bbox[2] * bbox[3]
|
| 66 |
+
if area > area_max:
|
| 67 |
+
area_max, cropping_bbox = area, bbox
|
| 68 |
+
|
| 69 |
+
if cropping_bbox is None:
|
| 70 |
+
raise ValueError("YOLO could not detect ocular globe in video.")
|
| 71 |
+
|
| 72 |
+
# Second pass: crop every frame with the largest bbox
|
| 73 |
+
frames = []
|
| 74 |
+
for frame in model.predict(video_path, stream=True, verbose=False, conf=conf):
|
| 75 |
+
img = frame.orig_img # [H, W, C] BGR
|
| 76 |
+
h, w, _ = img.shape
|
| 77 |
+
x_c, y_c, bw, bh = cropping_bbox
|
| 78 |
+
x1, y1 = int((x_c - bw/2) * w), int((y_c - bh/2) * h)
|
| 79 |
+
x2, y2 = int((x_c + bw/2) * w), int((y_c + bh/2) * h)
|
| 80 |
+
frames.append(img[y1:y2, x1:x2])
|
| 81 |
+
|
| 82 |
+
return np.stack(frames) # [D, H, W, C]
|
| 83 |
+
|
| 84 |
+
frames = crop_video("your_video.mp4", yolo) # [D, H, W, C]
|
| 85 |
+
|
| 86 |
+
# --- 3. Preprocess ---
|
| 87 |
+
video = torch.from_numpy(frames).float() # [D, H, W, C]
|
| 88 |
+
video = video.permute(3, 0, 1, 2) # [C, D, H, W]
|
| 89 |
+
if video.shape[0] == 3:
|
| 90 |
+
video = video.mean(dim=0, keepdim=True) # grayscale [1, D, H, W]
|
| 91 |
+
video = resize((96, 128, 128))(video) / 255.0 # pad + resize + normalize
|
| 92 |
+
video = video.unsqueeze(0) # [1, 1, 96, 128, 128]
|
| 93 |
+
|
| 94 |
+
# --- 4. Load model and run inference ---
|
| 95 |
+
model = Unet3DClassifier(in_channels=1, num_classes=1, f_maps=[64, 128, 256, 512, 768], pooling="avg")
|
| 96 |
+
weights = load_file(hf_hub_download("pcvlab/unet3d_normal_vs_rd", "model.safetensors"))
|
| 97 |
+
model.load_state_dict(weights)
|
| 98 |
+
model.eval()
|
| 99 |
+
|
| 100 |
+
with torch.no_grad():
|
| 101 |
+
logit = model(video)
|
| 102 |
+
prob = torch.sigmoid(logit).item()
|
| 103 |
+
pred = int(prob > 0.5)
|
| 104 |
+
|
| 105 |
+
labels = {'0': 'Normal', '1': 'Retinal Detachment'}
|
| 106 |
+
print(f"Prediction: {labels[str(pred)]} (confidence: {prob:.3f})")
|
| 107 |
+
```
|
| 108 |
+
|
| 109 |
+
## Citation
|
| 110 |
+
|
| 111 |
+
If you use this model, please cite the ERDES paper:
|
| 112 |
+
|
| 113 |
+
```bibtex
|
| 114 |
+
@misc{ozkut2026erdes,
|
| 115 |
+
title={ERDES: A Benchmark Video Dataset for Retinal Detachment and Macular Status Classification in Ocular Ultrasound},
|
| 116 |
+
author={Yasemin Ozkut and Pouyan Navard and Srikar Adhikari and Elaine Situ-LaCasse and Josie Acu{\~n}a and Adrienne Yarnish and Alper Yilmaz},
|
| 117 |
+
year={2026},
|
| 118 |
+
eprint={2508.04735},
|
| 119 |
+
archivePrefix={arXiv},
|
| 120 |
+
primaryClass={cs.CV},
|
| 121 |
+
url={https://arxiv.org/abs/2508.04735}
|
| 122 |
+
}
|
| 123 |
+
```
|