Upgrade NLP to sentence-transformers all-mpnet-base-v2
Browse files- Backend/gal_fallback.py +12 -38
- Dockerfile +2 -2
- requirements.txt +1 -3
Backend/gal_fallback.py
CHANGED
|
@@ -3,11 +3,11 @@ NLP-based fallback responder for the GAL AI chat.
|
|
| 3 |
|
| 4 |
Three-layer hybrid architecture:
|
| 5 |
Layer 1 β Rule Engine: regex matches compiler error messages β structured explanations
|
| 6 |
-
Layer 2 β Retriever:
|
| 7 |
Layer 3 β Default: help menu when nothing matches
|
| 8 |
|
| 9 |
Plus: synonym expansion, greeting detection, follow-up context, multi-topic blending.
|
| 10 |
-
|
| 11 |
All heavy imports are deferred so the server binds its port immediately.
|
| 12 |
"""
|
| 13 |
|
|
@@ -2222,11 +2222,10 @@ root() {
|
|
| 2222 |
|
| 2223 |
|
| 2224 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2225 |
-
#
|
| 2226 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2227 |
|
| 2228 |
-
|
| 2229 |
-
_tokenizer = None
|
| 2230 |
_phrase_embeddings = None
|
| 2231 |
_phrase_topic_idx = []
|
| 2232 |
_responses = []
|
|
@@ -2289,44 +2288,19 @@ _GREETING_PATTERNS = [
|
|
| 2289 |
|
| 2290 |
|
| 2291 |
def _encode(texts):
|
| 2292 |
-
"""
|
| 2293 |
-
|
| 2294 |
-
encodings = _tokenizer.encode_batch(texts)
|
| 2295 |
-
ids = np.array([e.ids for e in encodings], dtype=np.int64)
|
| 2296 |
-
mask = np.array([e.attention_mask for e in encodings], dtype=np.int64)
|
| 2297 |
-
ttype = np.zeros_like(ids)
|
| 2298 |
-
out = _session.run(
|
| 2299 |
-
None,
|
| 2300 |
-
{"input_ids": ids, "attention_mask": mask, "token_type_ids": ttype},
|
| 2301 |
-
)
|
| 2302 |
-
tok_emb = out[0] # (batch, seq_len, 384)
|
| 2303 |
-
mask_exp = mask[:, :, np.newaxis].astype(np.float32)
|
| 2304 |
-
pooled = np.sum(tok_emb * mask_exp, axis=1) / np.clip(
|
| 2305 |
-
mask_exp.sum(axis=1), 1e-9, None
|
| 2306 |
-
)
|
| 2307 |
-
norms = np.clip(np.linalg.norm(pooled, axis=1, keepdims=True), 1e-9, None)
|
| 2308 |
-
return pooled / norms
|
| 2309 |
|
| 2310 |
|
| 2311 |
def _ensure_model():
|
| 2312 |
-
"""
|
| 2313 |
-
global
|
| 2314 |
-
if
|
| 2315 |
return
|
| 2316 |
|
| 2317 |
-
|
| 2318 |
-
|
| 2319 |
-
|
| 2320 |
-
import onnxruntime as ort
|
| 2321 |
-
|
| 2322 |
-
repo = "Xenova/all-MiniLM-L6-v2"
|
| 2323 |
-
tok_path = hf_hub_download(repo, "tokenizer.json")
|
| 2324 |
-
model_path = hf_hub_download(repo, "onnx/model.onnx")
|
| 2325 |
-
|
| 2326 |
-
_tokenizer = Tokenizer.from_file(tok_path)
|
| 2327 |
-
_tokenizer.enable_padding()
|
| 2328 |
-
_tokenizer.enable_truncation(max_length=128)
|
| 2329 |
-
_session = ort.InferenceSession(model_path)
|
| 2330 |
|
| 2331 |
_phrase_topic_idx = []
|
| 2332 |
_responses = []
|
|
|
|
| 3 |
|
| 4 |
Three-layer hybrid architecture:
|
| 5 |
Layer 1 β Rule Engine: regex matches compiler error messages β structured explanations
|
| 6 |
+
Layer 2 β Retriever: sentence-transformers (all-mpnet-base-v2) semantic search over 50+ KB topics
|
| 7 |
Layer 3 β Default: help menu when nothing matches
|
| 8 |
|
| 9 |
Plus: synonym expansion, greeting detection, follow-up context, multi-topic blending.
|
| 10 |
+
Uses the #1 ranked sentence embedding model for best semantic matching accuracy.
|
| 11 |
All heavy imports are deferred so the server binds its port immediately.
|
| 12 |
"""
|
| 13 |
|
|
|
|
| 2222 |
|
| 2223 |
|
| 2224 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2225 |
+
# Sentence-Transformers (all-mpnet-base-v2) β lazy-loaded on first query
|
| 2226 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2227 |
|
| 2228 |
+
_st_model = None
|
|
|
|
| 2229 |
_phrase_embeddings = None
|
| 2230 |
_phrase_topic_idx = []
|
| 2231 |
_responses = []
|
|
|
|
| 2288 |
|
| 2289 |
|
| 2290 |
def _encode(texts):
|
| 2291 |
+
"""Encode texts using sentence-transformers (returns L2-normalised embeddings)."""
|
| 2292 |
+
return _st_model.encode(texts, normalize_embeddings=True, show_progress_bar=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2293 |
|
| 2294 |
|
| 2295 |
def _ensure_model():
|
| 2296 |
+
"""Load sentence-transformers model and encode training phrases on first call."""
|
| 2297 |
+
global _st_model, _phrase_embeddings, _phrase_topic_idx, _responses
|
| 2298 |
+
if _st_model is not None:
|
| 2299 |
return
|
| 2300 |
|
| 2301 |
+
from sentence_transformers import SentenceTransformer
|
| 2302 |
+
|
| 2303 |
+
_st_model = SentenceTransformer("all-mpnet-base-v2")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2304 |
|
| 2305 |
_phrase_topic_idx = []
|
| 2306 |
_responses = []
|
Dockerfile
CHANGED
|
@@ -6,8 +6,8 @@ WORKDIR /app
|
|
| 6 |
COPY requirements.txt .
|
| 7 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
|
| 9 |
-
# Pre-download the
|
| 10 |
-
RUN python -c "from
|
| 11 |
|
| 12 |
# Copy the entire project
|
| 13 |
COPY . .
|
|
|
|
| 6 |
COPY requirements.txt .
|
| 7 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
|
| 9 |
+
# Pre-download the sentence-transformers model so first request is fast
|
| 10 |
+
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')"
|
| 11 |
|
| 12 |
# Copy the entire project
|
| 13 |
COPY . .
|
requirements.txt
CHANGED
|
@@ -3,7 +3,5 @@ flask-socketio==5.3.6
|
|
| 3 |
flask-cors==4.0.0
|
| 4 |
eventlet
|
| 5 |
google-genai
|
| 6 |
-
|
| 7 |
-
tokenizers
|
| 8 |
-
huggingface-hub
|
| 9 |
numpy
|
|
|
|
| 3 |
flask-cors==4.0.0
|
| 4 |
eventlet
|
| 5 |
google-genai
|
| 6 |
+
sentence-transformers
|
|
|
|
|
|
|
| 7 |
numpy
|