cim10-backend / model.py
Louis_Mlr
Initial deploy: GLiNER-BioMed NER backend
3c0333d
"""
Modele NER medical base sur GLiNER-BioMed.
Detecte les concepts medicaux dans du texte clinique en zero-shot.
"""
import os
import logging
from typing import List, Dict, Any
from gliner import GLiNER
logger = logging.getLogger(__name__)
# Labels d entites a detecter.
# GLiNER est zero-shot : vous pouvez modifier cette liste librement.
DEFAULT_LABELS = [
"Maladie",
"Symptome",
"Medicament",
"Procedure medicale",
"Partie du corps",
"Examen de laboratoire",
]
# Seuil de confiance minimum (0.0 - 1.0)
DEFAULT_THRESHOLD = float(os.environ.get("NER_THRESHOLD", "0.4"))
# Modele a charger (variants disponibles : small, base, large)
MODEL_NAME = os.environ.get(
"GLINER_MODEL", "Ihor/gliner-biomed-small-v1.0"
)
class MedicalNERModel:
"""Wrapper autour de GLiNER-BioMed pour la detection d entites medicales."""
def __init__(self):
logger.info("Chargement de %s ...", MODEL_NAME)
self.model = GLiNER.from_pretrained(MODEL_NAME)
self.labels = self._load_labels()
self.threshold = DEFAULT_THRESHOLD
logger.info("Modele charge. Labels: %s | Seuil: %.2f", self.labels, self.threshold)
@staticmethod
def _load_labels() -> List[str]:
env_labels = os.environ.get("NER_LABELS", "")
if env_labels:
return [lb.strip() for lb in env_labels.split(",") if lb.strip()]
return DEFAULT_LABELS
def predict(
self,
text: str,
labels: List[str] = None,
threshold: float = None,
) -> List[Dict[str, Any]]:
"""
Detecte les entites medicales dans le texte.
Returns:
Liste de dicts avec keys: start, end, text, label, score.
"""
if not text or not text.strip():
return []
use_labels = labels or self.labels
use_threshold = threshold if threshold is not None else self.threshold
entities = self.model.predict_entities(
text, use_labels, threshold=use_threshold
)
return [
{
"start": ent["start"],
"end": ent["end"],
"text": ent["text"],
"label": ent["label"],
"score": float(ent["score"]),
}
for ent in entities
]