meet4150/alive_pine / app /nlp /nlp_service.py
meet4150's picture
download
raw
7.77 kB
from __future__ import annotations
from pathlib import Path
from threading import Lock
from sentence_transformers import SentenceTransformer
INTENT_PHRASES = {
"greeting": ["hi", "hello", "hey", "good morning", "good evening", "how are you"],
"emergency": [
"emergency",
"call ambulance",
"heart attack",
"can't breathe",
"dying",
"severe chest pain",
"unconscious",
"not breathing",
],
"educational": [
"what is",
"what causes",
"how does",
"explain",
"tell me about",
"symptoms of",
"treatment for",
"prevention of",
],
"assessment": [
"i have",
"i feel",
"i am experiencing",
"my chest",
"i've been",
"i noticed",
"i'm suffering",
"i got diagnosed",
],
}
DISEASE_PHRASES = {
"heart": [
"chest pain",
"heart attack",
"cardiac",
"palpitations",
"angina",
"heart disease",
"coronary",
"arrhythmia",
],
"diabetes": ["blood sugar", "diabetes", "insulin", "glucose", "diabetic"],
"asthma": ["asthma", "can't breathe", "inhaler", "wheezing", "shortness of breath"],
"liver": ["liver pain", "jaundice", "hepatitis", "liver disease"],
"kidney": ["kidney pain", "kidney stone", "renal", "dialysis"],
"mental_health": ["depression", "anxiety", "mental health", "stress", "panic attack"],
"cancer": ["cancer", "tumor", "chemotherapy", "malignant"],
}
class NLPService:
_instance: "NLPService | None" = None
_instance_lock = Lock()
_model_lock = Lock()
_model_name = "sentence-transformers/all-MiniLM-L6-v2"
_local_model_dir = (
Path(__file__).resolve().parents[2] / "models" / "sentence-transformers__all-MiniLM-L6-v2"
)
def __new__(cls) -> "NLPService":
if cls._instance is None:
with cls._instance_lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self) -> None:
if self._initialized:
return
self._model: SentenceTransformer | None = None
self._intent_embeddings: dict[str, list[list[float]]] = {}
self._disease_embeddings: dict[str, list[list[float]]] = {}
self._bootstrap()
self._initialized = True
def _bootstrap(self) -> None:
self._intent_embeddings = self._encode_phrase_groups(INTENT_PHRASES)
self._disease_embeddings = self._encode_phrase_groups(DISEASE_PHRASES)
def _load_model(self) -> SentenceTransformer:
if self._model is None:
with self._model_lock:
if self._model is None:
model_source = (
str(self._local_model_dir)
if self._local_model_dir.exists()
else self._model_name
)
self._model = SentenceTransformer(model_source)
print(f"NLP routing model loaded: {self._model_name}")
return self._model
def _encode(self, texts: list[str]) -> list[list[float]]:
model = self._load_model()
embeddings = model.encode(
texts,
normalize_embeddings=True,
show_progress_bar=False,
)
return embeddings.tolist()
def _encode_phrase_groups(self, phrase_groups: dict[str, list[str]]) -> dict[str, list[list[float]]]:
labels: list[str] = []
phrases: list[str] = []
for label, canonical_phrases in phrase_groups.items():
for phrase in canonical_phrases:
labels.append(label)
phrases.append(phrase)
encoded_phrases = self._encode(phrases)
grouped_embeddings: dict[str, list[list[float]]] = {label: [] for label in phrase_groups}
for label, embedding in zip(labels, encoded_phrases):
grouped_embeddings[label].append(embedding)
return grouped_embeddings
@staticmethod
def _cosine_similarity(vector_a: list[float], vector_b: list[float]) -> float:
return float(sum(value_a * value_b for value_a, value_b in zip(vector_a, vector_b)))
def _best_match(
self,
text: str,
reference_embeddings: dict[str, list[list[float]]],
) -> tuple[str, float]:
label_scores = self._label_scores(text, reference_embeddings)
best_label, best_score = max(label_scores.items(), key=lambda item: item[1])
return best_label, best_score
def _label_scores(
self,
text: str,
reference_embeddings: dict[str, list[list[float]]],
) -> dict[str, float]:
query_embedding = self._encode([text or ""])[0]
label_scores: dict[str, float] = {}
for label, embeddings in reference_embeddings.items():
label_scores[label] = max(
self._cosine_similarity(query_embedding, embedding) for embedding in embeddings
)
return label_scores
@staticmethod
def _has_phrase(text: str, phrases: list[str]) -> bool:
normalized_text = (text or "").lower()
return any(phrase in normalized_text for phrase in phrases)
def classify_intent(self, text: str) -> dict:
label_scores = self._label_scores(text, self._intent_embeddings)
normalized_text = (text or "").lower().strip()
if self._has_phrase(normalized_text, INTENT_PHRASES["emergency"]):
confidence = max(label_scores["emergency"], 0.90)
return {
"intent": "emergency",
"confidence": confidence,
"needs_llm_tiebreaker": False,
}
if self._has_phrase(normalized_text, INTENT_PHRASES["greeting"]):
confidence = max(label_scores["greeting"], 0.90)
return {
"intent": "greeting",
"confidence": confidence,
"needs_llm_tiebreaker": False,
}
if self._has_phrase(normalized_text, INTENT_PHRASES["assessment"]):
confidence = max(label_scores["assessment"], 0.85)
return {
"intent": "assessment",
"confidence": confidence,
"needs_llm_tiebreaker": False,
}
if self._has_phrase(normalized_text, INTENT_PHRASES["educational"]):
confidence = max(label_scores["educational"], 0.85)
return {
"intent": "educational",
"confidence": confidence,
"needs_llm_tiebreaker": False,
}
intent, confidence = max(label_scores.items(), key=lambda item: item[1])
return {
"intent": intent,
"confidence": confidence,
"needs_llm_tiebreaker": 0.30 <= confidence <= 0.65,
}
def detect_disease(self, text: str) -> dict:
disease_id, confidence = self._best_match(text, self._disease_embeddings)
if confidence < 0.30:
return {"disease_id": "general", "confidence": 0.0}
return {"disease_id": disease_id, "confidence": confidence}
def process(self, text: str) -> dict:
intent_result = self.classify_intent(text)
disease_result = self.detect_disease(text)
return {
"intent": intent_result["intent"],
"intent_confidence": intent_result["confidence"],
"disease_id": disease_result["disease_id"],
"disease_confidence": disease_result["confidence"],
"needs_llm_tiebreaker": intent_result["needs_llm_tiebreaker"],
}

Xet Storage Details

Size:
7.77 kB
·
Xet hash:
d5c1744613e5c5a7eaefb1d9b6f19f97029300c19478843673124530c6fa70be

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.