File size: 6,325 Bytes
cf630b4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | """
Skanner - Melanoma Classification Module (v2)
==============================================
Uses a binary melanoma/benign classifier (Hemgg/Melanoma-Cancer-Image-classification)
tuned to threshold 0.15 for screening-optimized sensitivity.
Benchmarked on ISIC 2024 (n=50 balanced sample):
- Sensitivity: 84% (catches 21 of 25 melanomas)
- Specificity: 56% (clears 14 of 25 benign)
- Threshold: 0.15 (tuned for screening use case)
Why threshold 0.15 instead of 0.50:
In melanoma screening, missing cancer is worse than a false alarm. A false
alarm sends someone to a dermatologist who clears them. A missed cancer
becomes an advanced tumor. We tune toward sensitivity.
Usage (CLI):
python classify.py path/to/lesion.jpg
Usage (Python):
from classify import SkannerClassifier
clf = SkannerClassifier()
result = clf.classify("lesion.jpg")
print(result["risk_level"], result["melanoma_probability"])
"""
from __future__ import annotations
import sys
from pathlib import Path
from typing import Union
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification
# Model + threshold determined by compare_models.py benchmark
DEFAULT_MODEL = "Hemgg/Melanoma-Cancer-Image-classification"
MELANOMA_THRESHOLD = 0.15 # Screening-optimized (vs default 0.50)
MODERATE_THRESHOLD = 0.08 # Below this is Low risk; above is Moderate
def _detect_device() -> str:
"""Pick best available device. M-series Macs get MPS acceleration."""
if torch.cuda.is_available():
return "cuda"
if torch.backends.mps.is_available():
return "mps"
return "cpu"
class SkannerClassifier:
"""Binary melanoma/benign classifier tuned for screening."""
def __init__(self, model_name: str = DEFAULT_MODEL, device: str | None = None):
self.device = device or _detect_device()
print(f"[Skanner] Loading model '{model_name}' on {self.device}...")
self.processor = AutoImageProcessor.from_pretrained(model_name)
self.model = AutoModelForImageClassification.from_pretrained(model_name)
self.model.to(self.device)
self.model.eval()
self.id2label = self.model.config.id2label
print(f"[Skanner] Ready. Classes: {list(self.id2label.values())}")
def classify(self, image: Union[str, Path, Image.Image]) -> dict:
"""
Run classification on a single image.
Returns:
{
"melanoma_probability": float, # 0.0 - 1.0 (primary output)
"risk_level": "Low"|"Moderate"|"High",
"top_prediction": str,
"top_confidence": float,
"all_probabilities": {class_name: prob, ...},
"threshold_used": float,
}
"""
# Accept either a path or a pre-loaded PIL image
if isinstance(image, (str, Path)):
image = Image.open(image).convert("RGB")
elif not isinstance(image, Image.Image):
raise TypeError(
f"Expected str, Path, or PIL.Image; got {type(image).__name__}"
)
else:
image = image.convert("RGB")
# Preprocess and run the model
inputs = self.processor(images=image, return_tensors="pt").to(self.device)
with torch.no_grad():
logits = self.model(**inputs).logits
probs = torch.nn.functional.softmax(logits, dim=-1)[0].cpu()
# Build per-class probabilities dict
all_probs = {self.id2label[i]: float(probs[i]) for i in range(len(probs))}
# Find the melanoma-indicating probability.
# Hemgg model uses labels ['Benign', 'Malignant'] -> malignant == melanoma here.
melanoma_prob = 0.0
for label, prob in all_probs.items():
label_lower = label.lower()
if "malignant" in label_lower or "melanoma" in label_lower:
melanoma_prob = prob
break
# Top prediction (for display)
top_idx = int(torch.argmax(probs))
top_class = self.id2label[top_idx]
top_conf = float(probs[top_idx])
return {
"melanoma_probability": melanoma_prob,
"risk_level": self._triage(melanoma_prob),
"top_prediction": top_class,
"top_confidence": top_conf,
"all_probabilities": all_probs,
"threshold_used": MELANOMA_THRESHOLD,
}
@staticmethod
def _triage(melanoma_prob: float) -> str:
"""Three-tier risk stratification, tuned for screening.
High: prob >= 15% (flag for dermatologist referral)
Moderate: prob >= 8% (monitor / follow-up recommended)
Low: prob < 8% (routine self-monitoring)
"""
if melanoma_prob >= MELANOMA_THRESHOLD:
return "High"
if melanoma_prob >= MODERATE_THRESHOLD:
return "Moderate"
return "Low"
def _print_result(result: dict) -> None:
"""Pretty-print a classification result to the terminal."""
print()
print("=" * 60)
print(" SKANNER CLASSIFICATION RESULT")
print("=" * 60)
print(f" Melanoma probability: {result['melanoma_probability']:.1%}")
print(f" Risk level: {result['risk_level']}")
print(f" Threshold used: {result['threshold_used']:.2f} (screening-tuned)")
print()
print(" Class breakdown:")
sorted_probs = sorted(
result["all_probabilities"].items(), key=lambda x: -x[1]
)
for cls, prob in sorted_probs:
bar = "█" * int(prob * 30)
print(f" {cls:<24s} {prob:6.1%} {bar}")
print("=" * 60)
print()
print(" REMINDER: This is a screening tool, NOT a medical diagnosis.")
print(" Always consult a qualified dermatologist.")
print()
def main():
if len(sys.argv) < 2:
print("Usage: python classify.py <image_path>")
print("Example: python classify.py ISIC_2024_Permissive_Training_Input/ISIC_9855202.jpg")
sys.exit(1)
image_path = Path(sys.argv[1])
if not image_path.exists():
print(f"Error: file not found: {image_path}")
sys.exit(1)
classifier = SkannerClassifier()
result = classifier.classify(image_path)
_print_result(result)
if __name__ == "__main__":
main()
|