Spaces:

fvdbfvis
/

derm-fastapi-backend

Running

App Files Files Community

Daniel Huynh commited on 19 days ago

Commit

cb92718

0 Parent(s):

Deploy FastAPI derm backend to Hugging Face Spaces

Browse files

Files changed (19) hide show

.dockerignore +5 -0
.gitattributes +1 -0
.gitignore +24 -0
Dockerfile +17 -0
README.md +125 -0
app/__init__.py +0 -0
app/config.py +26 -0
app/main.py +71 -0
app/models/__init__.py +0 -0
app/models/mlp_head.py +68 -0
app/schemas.py +15 -0
app/services/__init__.py +0 -0
app/services/derm_backbone.py +54 -0
app/services/predictor.py +89 -0
app/services/preprocessing.py +51 -0
class_names.json +25 -0
derm_foundation_mlp_head.pt +3 -0
requirements.txt +9 -0
scripts/test_request.py +15 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.env
+.git/
+__pycache__/
+*.pyc
+.DS_Store

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pt filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,24 @@

+# Secrets
+.env
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+# Virtual environments
+venv/
+.venv/
+env/
+# Jupyter
+.ipynb_checkpoints/
+# Mac / Windows
+.DS_Store
+Thumbs.db
+# Hugging Face / model cache
+.cache/
+huggingface/

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.13.13-slim
+WORKDIR /app
+ENV PYTHONUNBUFFERED=1
+ENV PIP_NO_CACHE_DIR=1
+COPY requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,125 @@

+---
+title: Basic Docker SDK Space
+emoji: 🐳
+colorFrom: purple
+colorTo: gray
+sdk: docker
+app_port: 7860
+---
+# Derm Foundation FastAPI Two-Stage Classifier
+This project deploys a two-stage inference pipeline:
+```text
+image -> Google Derm Foundation SavedModel -> embedding -> PyTorch MLP head -> class probabilities
+```
+The preprocessing follows the notebook pipeline:
+```text
+RGB -> resize 448x448 -> PNG bytes -> tf.train.Example with key image/encoded
+```
+## Project structure
+```text
+derm_fastapi_project/
+  app/
+    main.py                         # FastAPI app and endpoints
+    config.py                       # Environment settings
+    schemas.py                      # API response models
+    services/
+      preprocessing.py              # image -> serialized tf.train.Example
+      derm_backbone.py              # Derm Foundation wrapper
+      predictor.py                  # two-stage sequential forward pass
+    models/
+      mlp_head.py                   # load model_state_dict from .pt checkpoint
+  scripts/
+    test_request.py                 # local API test client
+  requirements.txt
+  class_names.json                  # replace with your real class order
+  .env.example
+```
+## Setup
+Create a virtual environment, then install dependencies:
+```bash
+pip install -r requirements.txt
+```
+Put your PyTorch checkpoint in the project root:
+```text
+derm_foundation_mlp_head.pt
+```
+The checkpoint should contain:
+```python
+{
+    "model_state_dict": mlp_head.state_dict(),
+    # optional but recommended:
+    "class_names": [...]
+}
+```
+If the checkpoint does not contain `class_names`, edit `class_names.json` so the order exactly matches your training label order.
+## Hugging Face token
+Do not put your token in the source code.
+Use an environment variable:
+```bash
+export HF_TOKEN="hf_your_token_here"
+```
+You must already have access to `google/derm-foundation` on Hugging Face.
+## Run the API
+```bash
+uvicorn app.main:app --host 0.0.0.0 --port 8000
+```
+Test in browser:
+```text
+http://127.0.0.1:8000/docs
+```
+Test with Python:
+```bash
+python scripts/test_request.py path/to/image.jpg
+```
+## API endpoint
+### POST `/predict`
+Input: multipart image upload named `file`.
+Output:
+```json
+{
+  "predicted_index": 0,
+  "predicted_class": "class_0",
+  "confidence": 0.91,
+  "probabilities": [
+    {"index": 0, "class_name": "class_0", "probability": 0.91},
+    {"index": 1, "class_name": "class_1", "probability": 0.02}
+  ]
+}
+```
+## Important note about the MLP head
+`app/models/mlp_head.py` reconstructs the MLP from Linear layer tensors in `model_state_dict`.
+It assumes Linear layers with ReLU between hidden layers. This is usually fine for a simple MLP head.
+If your original head used a different activation, BatchNorm, or a more complex custom architecture, replace `InferredMLPHead` with the exact same class used during training.

app/__init__.py ADDED Viewed

File without changes

app/config.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from dataclasses import dataclass
+import os
+from pathlib import Path
+from typing import Optional
+@dataclass(frozen=True)
+class Settings:
+    # Hugging Face / Derm Foundation
+    derm_model_id: str = os.getenv("DERM_MODEL_ID", "google/derm-foundation")
+    hf_token: Optional[str] = os.getenv("HF_TOKEN")
+    local_files_only: bool = os.getenv("HF_LOCAL_FILES_ONLY", "false").lower() == "true"
+    # Model artifacts
+    head_checkpoint_path: Path = Path(os.getenv("HEAD_CHECKPOINT_PATH", "derm_foundation_mlp_head.pt"))
+    class_names_path: Path = Path(os.getenv("CLASS_NAMES_PATH", "class_names.json"))
+    # Inference
+    image_size: int = int(os.getenv("DERM_IMAGE_SIZE", "448"))
+    device: str = os.getenv("TORCH_DEVICE", "auto")
+    # API
+    cors_origins: str = os.getenv("CORS_ORIGINS", "*")
+settings = Settings()

app/main.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from fastapi import FastAPI, File, HTTPException, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from app.config import settings
+from app.schemas import PredictionResponse
+app = FastAPI(
+    title="Derm Foundation Classifier API",
+    description="Derm Foundation embedding backbone + PyTorch MLP head.",
+    version="1.0.0",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[origin.strip() for origin in settings.cors_origins.split(",")],
+    allow_credentials=False,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.state.predictor = None
+def get_predictor():
+    if app.state.predictor is None:
+        print("Loading TwoStageDermPredictor...", flush=True)
+        from app.services.predictor import TwoStageDermPredictor
+        app.state.predictor = TwoStageDermPredictor(
+            derm_model_id=settings.derm_model_id,
+            head_checkpoint_path=str(settings.head_checkpoint_path),
+            hf_token=settings.hf_token,
+            local_files_only=settings.local_files_only,
+            image_size=settings.image_size,
+            device_name=settings.device,
+        )
+        print("TwoStageDermPredictor loaded.", flush=True)
+    return app.state.predictor
+@app.get("/")
+def root():
+    return {"message": "Derm Foundation API is running"}
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+@app.post("/predict", response_model=PredictionResponse)
+async def predict(file: UploadFile = File(...)):
+    if file.content_type is not None and not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="Uploaded file must be an image.")
+    image_bytes = await file.read()
+    if not image_bytes:
+        raise HTTPException(status_code=400, detail="Uploaded image is empty.")
+    try:
+        predictor = get_predictor()
+        return predictor.predict(image_bytes)
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc

app/models/__init__.py ADDED Viewed

File without changes

app/models/mlp_head.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+import torch.nn as nn
+DROPOUT = 0.6
+class DermFoundationMLPHead(nn.Sequential):
+    """
+    Exact MLP head used after Derm Foundation embeddings.
+    Architecture:
+        Linear(input_dim, 512) -> ReLU -> Dropout(0.6)
+        Linear(512, 256)      -> ReLU -> Dropout(0.6)
+        Linear(256, 128)      -> ReLU -> Dropout(0.6)
+        Linear(128, num_classes)
+    """
+    def __init__(self, input_dim: int, num_classes: int):
+        super().__init__(
+            nn.Linear(input_dim, 512),
+            nn.ReLU(),
+            nn.Dropout(DROPOUT),
+            nn.Linear(512, 256),
+            nn.ReLU(),
+            nn.Dropout(DROPOUT),
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Dropout(DROPOUT),
+            nn.Linear(128, num_classes),
+        )
+def build_mlp_head_from_checkpoint(
+    checkpoint_path: str,
+    device: torch.device,
+) -> tuple[nn.Module, dict]:
+    """
+    Load derm_foundation_mlp_head.pt.
+    Expected checkpoint format:
+        {
+            "model_state_dict": model.state_dict(),
+            ...
+        }
+    """
+    checkpoint = torch.load(
+        checkpoint_path,
+        map_location=device,
+    )
+    state_dict = checkpoint["model_state_dict"]
+    input_dim = int(state_dict["0.weight"].shape[1])
+    num_classes = int(state_dict["9.weight"].shape[0])
+    head = DermFoundationMLPHead(
+        input_dim=input_dim,
+        num_classes=num_classes,
+    ).to(device)
+    head.load_state_dict(state_dict, strict=True)
+    head.eval()
+    return head, checkpoint

app/schemas.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from pydantic import BaseModel
+from typing import List
+class ClassProbability(BaseModel):
+    index: int
+    class_name: str
+    probability: float
+class PredictionResponse(BaseModel):
+    predicted_index: int
+    predicted_class: str
+    confidence: float
+    probabilities: List[ClassProbability]

app/services/__init__.py ADDED Viewed

File without changes

app/services/derm_backbone.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from pathlib import Path
+from typing import Tuple
+import numpy as np
+import tensorflow as tf
+from huggingface_hub import snapshot_download
+from app.services.preprocessing import image_bytes_to_tf_string_tensor
+class DermFoundationBackbone:
+    """
+    Thin wrapper around the Google Derm Foundation SavedModel.
+    It converts image bytes into the model's serialized tf.Example input
+    and returns the 6144-d embedding.
+    """
+    def __init__(
+        self,
+        repo_id: str = "google/derm-foundation",
+        token: str | None = None,
+        local_files_only: bool = False,
+        image_size: int = 448,
+    ) -> None:
+        self.repo_id = repo_id
+        self.image_size: Tuple[int, int] = (image_size, image_size)
+        model_path = snapshot_download(
+            repo_id=repo_id,
+            token=token,
+            local_files_only=local_files_only,
+        )
+        self.model_path = Path(model_path)
+        self.model = tf.saved_model.load(str(self.model_path))
+        self.infer = self.model.signatures["serving_default"]
+    def image_to_embedding(self, image_bytes: bytes) -> np.ndarray:
+        """
+        Return embedding with shape [1, embedding_dim].
+        Derm Foundation normally returns key: "embedding".
+        """
+        tf_inputs = image_bytes_to_tf_string_tensor(image_bytes, img_size=self.image_size)
+        # Your notebook used infer(inputs=tf_inputs). Keep that first.
+        try:
+            output = self.infer(inputs=tf_inputs)
+        except TypeError:
+            output = self.infer(tf_inputs)
+        if "embedding" not in output:
+            available = ", ".join(output.keys())
+            raise KeyError(f"Expected output key 'embedding'. Available keys: {available}")
+        return output["embedding"].numpy().astype("float32")

app/services/predictor.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import json
+from pathlib import Path
+import torch
+from app.models.mlp_head import build_mlp_head_from_checkpoint
+from app.services.derm_backbone import DermFoundationBackbone
+def load_class_names() -> dict[int, str]:
+    project_root = Path(__file__).resolve().parents[2]
+    class_names_path = project_root / "class_names.json"
+    with open(class_names_path, "r", encoding="utf-8") as f:
+        raw_class_names = json.load(f)
+    return {int(index): name for index, name in raw_class_names.items()}
+class TwoStageDermPredictor:
+    """
+    Stage 1: Derm Foundation image -> embedding.
+    Stage 2: PyTorch MLP head embedding -> class probabilities.
+    """
+    def __init__(
+        self,
+        derm_model_id: str,
+        head_checkpoint_path: str,
+        hf_token: str | None = None,
+        local_files_only: bool = False,
+        image_size: int = 448,
+        device_name: str = "auto",
+    ) -> None:
+        if device_name == "auto":
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        else:
+            self.device = torch.device(device_name)
+        self.class_names = load_class_names()
+        self.backbone = DermFoundationBackbone(
+            repo_id=derm_model_id,
+            token=hf_token,
+            local_files_only=local_files_only,
+            image_size=image_size,
+        )
+        self.head, _ = build_mlp_head_from_checkpoint(
+            checkpoint_path=head_checkpoint_path,
+            device=self.device,
+        )
+        output_dim = self.head[-1].out_features
+        if output_dim != len(self.class_names):
+            raise ValueError(
+                f"MLP output dimension is {output_dim}, "
+                f"but class_names.json contains {len(self.class_names)} classes."
+            )
+    def predict(self, image_bytes: bytes) -> dict:
+        embedding_np = self.backbone.image_to_embedding(image_bytes)
+        embedding = torch.from_numpy(embedding_np).float().to(self.device)
+        with torch.no_grad():
+            logits = self.head(embedding)
+            probs = torch.softmax(logits, dim=1)[0].cpu()
+        pred_idx = int(torch.argmax(probs).item())
+        confidence = float(probs[pred_idx].item())
+        print(self.class_names)
+        probabilities = [
+            {
+                "index": i,
+                "class_name": self.class_names[i],
+                "probability": float(prob),
+            }
+            for i, prob in enumerate(probs.tolist())
+        ]
+        return {
+            "predicted_index": pred_idx,
+            "predicted_class": self.class_names[pred_idx],
+            "confidence": confidence,
+            "probabilities": probabilities,
+        }

app/services/preprocessing.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import io
+from typing import Tuple
+import tensorflow as tf
+from PIL import Image
+DERM_FOUNDATION_INPUT_SIZE = (448, 448)
+def pil_to_serialized_example(
+    img: Image.Image,
+    img_size: Tuple[int, int] = DERM_FOUNDATION_INPUT_SIZE,
+) -> bytes:
+    """
+    Convert one PIL image into the serialized tf.train.Example format
+    expected by Google Derm Foundation.
+    Pipeline:
+    RGB -> resize -> PNG bytes -> tf.train.Example with key image/encoded
+    """
+    img = img.convert("RGB")
+    img = img.resize(img_size, resample=Image.BILINEAR)
+    buffer = io.BytesIO()
+    img.save(buffer, format="PNG")
+    image_bytes = buffer.getvalue()
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                "image/encoded": tf.train.Feature(
+                    bytes_list=tf.train.BytesList(value=[image_bytes])
+                )
+            }
+        )
+    )
+    return example.SerializeToString()
+def image_bytes_to_tf_string_tensor(
+    image_bytes: bytes,
+    img_size: Tuple[int, int] = DERM_FOUNDATION_INPUT_SIZE,
+) -> tf.Tensor:
+    """
+    Convert uploaded image bytes into a batch of one tf.string input.
+    """
+    with Image.open(io.BytesIO(image_bytes)) as img:
+        serialized = pil_to_serialized_example(img, img_size=img_size)
+    return tf.constant([serialized], dtype=tf.string)

class_names.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "0": "Acne and Rosacea Photos",
+    "1": "Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions",
+    "2": "Atopic Dermatitis Photos",
+    "3": "Bullous Disease Photos",
+    "4": "Cellulitis Impetigo and other Bacterial Infections",
+    "5": "Eczema Photos",
+    "6": "Exanthems and Drug Eruptions",
+    "7": "Hair Loss Photos Alopecia and other Hair Diseases",
+    "8": "Herpes HPV and other STDs Photos",
+    "9": "Light Diseases and Disorders of Pigmentation",
+    "10": "Lupus and other Connective Tissue diseases",
+    "11": "Melanoma Skin Cancer Nevi and Moles",
+    "12": "Nail Fungus and other Nail Disease",
+    "13": "Poison Ivy Photos and other Contact Dermatitis",
+    "14": "Psoriasis pictures Lichen Planus and related diseases",
+    "15": "Scabies Lyme Disease and other Infestations and Bites",
+    "16": "Seborrheic Keratoses and other Benign Tumors",
+    "17": "Systemic Disease",
+    "18": "Tinea Ringworm Candidiasis and other Fungal Infections",
+    "19": "Urticaria Hives",
+    "20": "Vascular Tumors",
+    "21": "Vasculitis Photos",
+    "22": "Warts Molluscum and other Viral Infections"
+}

derm_foundation_mlp_head.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3800ab71987278a5d6885e97742be350e4f063e4f19cff20126385be0ab8c25c
+size 13257851

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn[standard]
+python-multipart
+numpy
+tensorflow-cpu
+huggingface_hub==0.36.2
+Pillow
+torch
+requests

scripts/test_request.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import sys
+import requests
+if len(sys.argv) != 2:
+    raise SystemExit("Usage: python scripts/test_request.py path/to/image.jpg")
+image_path = sys.argv[1]
+url = "http://127.0.0.1:8000/predict"
+with open(image_path, "rb") as f:
+    response = requests.post(url, files={"file": f})
+print(response.status_code)
+print(response.json())