Buckets:
| from __future__ import annotations | |
| from pathlib import Path | |
| from threading import Lock | |
| from sentence_transformers import SentenceTransformer | |
| INTENT_PHRASES = { | |
| "greeting": ["hi", "hello", "hey", "good morning", "good evening", "how are you"], | |
| "emergency": [ | |
| "emergency", | |
| "call ambulance", | |
| "heart attack", | |
| "can't breathe", | |
| "dying", | |
| "severe chest pain", | |
| "unconscious", | |
| "not breathing", | |
| ], | |
| "educational": [ | |
| "what is", | |
| "what causes", | |
| "how does", | |
| "explain", | |
| "tell me about", | |
| "symptoms of", | |
| "treatment for", | |
| "prevention of", | |
| ], | |
| "assessment": [ | |
| "i have", | |
| "i feel", | |
| "i am experiencing", | |
| "my chest", | |
| "i've been", | |
| "i noticed", | |
| "i'm suffering", | |
| "i got diagnosed", | |
| ], | |
| } | |
| DISEASE_PHRASES = { | |
| "heart": [ | |
| "chest pain", | |
| "heart attack", | |
| "cardiac", | |
| "palpitations", | |
| "angina", | |
| "heart disease", | |
| "coronary", | |
| "arrhythmia", | |
| ], | |
| "diabetes": ["blood sugar", "diabetes", "insulin", "glucose", "diabetic"], | |
| "asthma": ["asthma", "can't breathe", "inhaler", "wheezing", "shortness of breath"], | |
| "liver": ["liver pain", "jaundice", "hepatitis", "liver disease"], | |
| "kidney": ["kidney pain", "kidney stone", "renal", "dialysis"], | |
| "mental_health": ["depression", "anxiety", "mental health", "stress", "panic attack"], | |
| "cancer": ["cancer", "tumor", "chemotherapy", "malignant"], | |
| } | |
| class NLPService: | |
| _instance: "NLPService | None" = None | |
| _instance_lock = Lock() | |
| _model_lock = Lock() | |
| _model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
| _local_model_dir = ( | |
| Path(__file__).resolve().parents[2] / "models" / "sentence-transformers__all-MiniLM-L6-v2" | |
| ) | |
| def __new__(cls) -> "NLPService": | |
| if cls._instance is None: | |
| with cls._instance_lock: | |
| if cls._instance is None: | |
| cls._instance = super().__new__(cls) | |
| cls._instance._initialized = False | |
| return cls._instance | |
| def __init__(self) -> None: | |
| if self._initialized: | |
| return | |
| self._model: SentenceTransformer | None = None | |
| self._intent_embeddings: dict[str, list[list[float]]] = {} | |
| self._disease_embeddings: dict[str, list[list[float]]] = {} | |
| self._bootstrap() | |
| self._initialized = True | |
| def _bootstrap(self) -> None: | |
| self._intent_embeddings = self._encode_phrase_groups(INTENT_PHRASES) | |
| self._disease_embeddings = self._encode_phrase_groups(DISEASE_PHRASES) | |
| def _load_model(self) -> SentenceTransformer: | |
| if self._model is None: | |
| with self._model_lock: | |
| if self._model is None: | |
| model_source = ( | |
| str(self._local_model_dir) | |
| if self._local_model_dir.exists() | |
| else self._model_name | |
| ) | |
| self._model = SentenceTransformer(model_source) | |
| print(f"NLP routing model loaded: {self._model_name}") | |
| return self._model | |
| def _encode(self, texts: list[str]) -> list[list[float]]: | |
| model = self._load_model() | |
| embeddings = model.encode( | |
| texts, | |
| normalize_embeddings=True, | |
| show_progress_bar=False, | |
| ) | |
| return embeddings.tolist() | |
| def _encode_phrase_groups(self, phrase_groups: dict[str, list[str]]) -> dict[str, list[list[float]]]: | |
| labels: list[str] = [] | |
| phrases: list[str] = [] | |
| for label, canonical_phrases in phrase_groups.items(): | |
| for phrase in canonical_phrases: | |
| labels.append(label) | |
| phrases.append(phrase) | |
| encoded_phrases = self._encode(phrases) | |
| grouped_embeddings: dict[str, list[list[float]]] = {label: [] for label in phrase_groups} | |
| for label, embedding in zip(labels, encoded_phrases): | |
| grouped_embeddings[label].append(embedding) | |
| return grouped_embeddings | |
| def _cosine_similarity(vector_a: list[float], vector_b: list[float]) -> float: | |
| return float(sum(value_a * value_b for value_a, value_b in zip(vector_a, vector_b))) | |
| def _best_match( | |
| self, | |
| text: str, | |
| reference_embeddings: dict[str, list[list[float]]], | |
| ) -> tuple[str, float]: | |
| label_scores = self._label_scores(text, reference_embeddings) | |
| best_label, best_score = max(label_scores.items(), key=lambda item: item[1]) | |
| return best_label, best_score | |
| def _label_scores( | |
| self, | |
| text: str, | |
| reference_embeddings: dict[str, list[list[float]]], | |
| ) -> dict[str, float]: | |
| query_embedding = self._encode([text or ""])[0] | |
| label_scores: dict[str, float] = {} | |
| for label, embeddings in reference_embeddings.items(): | |
| label_scores[label] = max( | |
| self._cosine_similarity(query_embedding, embedding) for embedding in embeddings | |
| ) | |
| return label_scores | |
| def _has_phrase(text: str, phrases: list[str]) -> bool: | |
| normalized_text = (text or "").lower() | |
| return any(phrase in normalized_text for phrase in phrases) | |
| def classify_intent(self, text: str) -> dict: | |
| label_scores = self._label_scores(text, self._intent_embeddings) | |
| normalized_text = (text or "").lower().strip() | |
| if self._has_phrase(normalized_text, INTENT_PHRASES["emergency"]): | |
| confidence = max(label_scores["emergency"], 0.90) | |
| return { | |
| "intent": "emergency", | |
| "confidence": confidence, | |
| "needs_llm_tiebreaker": False, | |
| } | |
| if self._has_phrase(normalized_text, INTENT_PHRASES["greeting"]): | |
| confidence = max(label_scores["greeting"], 0.90) | |
| return { | |
| "intent": "greeting", | |
| "confidence": confidence, | |
| "needs_llm_tiebreaker": False, | |
| } | |
| if self._has_phrase(normalized_text, INTENT_PHRASES["assessment"]): | |
| confidence = max(label_scores["assessment"], 0.85) | |
| return { | |
| "intent": "assessment", | |
| "confidence": confidence, | |
| "needs_llm_tiebreaker": False, | |
| } | |
| if self._has_phrase(normalized_text, INTENT_PHRASES["educational"]): | |
| confidence = max(label_scores["educational"], 0.85) | |
| return { | |
| "intent": "educational", | |
| "confidence": confidence, | |
| "needs_llm_tiebreaker": False, | |
| } | |
| intent, confidence = max(label_scores.items(), key=lambda item: item[1]) | |
| return { | |
| "intent": intent, | |
| "confidence": confidence, | |
| "needs_llm_tiebreaker": 0.30 <= confidence <= 0.65, | |
| } | |
| def detect_disease(self, text: str) -> dict: | |
| disease_id, confidence = self._best_match(text, self._disease_embeddings) | |
| if confidence < 0.30: | |
| return {"disease_id": "general", "confidence": 0.0} | |
| return {"disease_id": disease_id, "confidence": confidence} | |
| def process(self, text: str) -> dict: | |
| intent_result = self.classify_intent(text) | |
| disease_result = self.detect_disease(text) | |
| return { | |
| "intent": intent_result["intent"], | |
| "intent_confidence": intent_result["confidence"], | |
| "disease_id": disease_result["disease_id"], | |
| "disease_confidence": disease_result["confidence"], | |
| "needs_llm_tiebreaker": intent_result["needs_llm_tiebreaker"], | |
| } | |
Xet Storage Details
- Size:
- 7.77 kB
- Xet hash:
- d5c1744613e5c5a7eaefb1d9b6f19f97029300c19478843673124530c6fa70be
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.