Spaces:
Sleeping
Sleeping
| """ | |
| Modele NER medical base sur GLiNER-BioMed. | |
| Detecte les concepts medicaux dans du texte clinique en zero-shot. | |
| """ | |
| import os | |
| import logging | |
| from typing import List, Dict, Any | |
| from gliner import GLiNER | |
| logger = logging.getLogger(__name__) | |
| # Labels d entites a detecter. | |
| # GLiNER est zero-shot : vous pouvez modifier cette liste librement. | |
| DEFAULT_LABELS = [ | |
| "Maladie", | |
| "Symptome", | |
| "Medicament", | |
| "Procedure medicale", | |
| "Partie du corps", | |
| "Examen de laboratoire", | |
| ] | |
| # Seuil de confiance minimum (0.0 - 1.0) | |
| DEFAULT_THRESHOLD = float(os.environ.get("NER_THRESHOLD", "0.4")) | |
| # Modele a charger (variants disponibles : small, base, large) | |
| MODEL_NAME = os.environ.get( | |
| "GLINER_MODEL", "Ihor/gliner-biomed-small-v1.0" | |
| ) | |
| class MedicalNERModel: | |
| """Wrapper autour de GLiNER-BioMed pour la detection d entites medicales.""" | |
| def __init__(self): | |
| logger.info("Chargement de %s ...", MODEL_NAME) | |
| self.model = GLiNER.from_pretrained(MODEL_NAME) | |
| self.labels = self._load_labels() | |
| self.threshold = DEFAULT_THRESHOLD | |
| logger.info("Modele charge. Labels: %s | Seuil: %.2f", self.labels, self.threshold) | |
| def _load_labels() -> List[str]: | |
| env_labels = os.environ.get("NER_LABELS", "") | |
| if env_labels: | |
| return [lb.strip() for lb in env_labels.split(",") if lb.strip()] | |
| return DEFAULT_LABELS | |
| def predict( | |
| self, | |
| text: str, | |
| labels: List[str] = None, | |
| threshold: float = None, | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| Detecte les entites medicales dans le texte. | |
| Returns: | |
| Liste de dicts avec keys: start, end, text, label, score. | |
| """ | |
| if not text or not text.strip(): | |
| return [] | |
| use_labels = labels or self.labels | |
| use_threshold = threshold if threshold is not None else self.threshold | |
| entities = self.model.predict_entities( | |
| text, use_labels, threshold=use_threshold | |
| ) | |
| return [ | |
| { | |
| "start": ent["start"], | |
| "end": ent["end"], | |
| "text": ent["text"], | |
| "label": ent["label"], | |
| "score": float(ent["score"]), | |
| } | |
| for ent in entities | |
| ] | |