Clarkoer commited on
Commit
f05d609
Β·
1 Parent(s): 445a94e

Upgrade NLP to sentence-transformers all-mpnet-base-v2

Browse files
Files changed (3) hide show
  1. Backend/gal_fallback.py +12 -38
  2. Dockerfile +2 -2
  3. requirements.txt +1 -3
Backend/gal_fallback.py CHANGED
@@ -3,11 +3,11 @@ NLP-based fallback responder for the GAL AI chat.
3
 
4
  Three-layer hybrid architecture:
5
  Layer 1 β€” Rule Engine: regex matches compiler error messages β†’ structured explanations
6
- Layer 2 β€” Retriever: ONNX MiniLM semantic search over 50+ knowledge-base topics
7
  Layer 3 β€” Default: help menu when nothing matches
8
 
9
  Plus: synonym expansion, greeting detection, follow-up context, multi-topic blending.
10
- Lightweight (~30 MB RAM) β€” fits on Render free tier.
11
  All heavy imports are deferred so the server binds its port immediately.
12
  """
13
 
@@ -2222,11 +2222,10 @@ root() {
2222
 
2223
 
2224
  # ═══════════════════════════════════════════════════════════════════════
2225
- # ONNX MiniLM β€” lazy-loaded on first query
2226
  # ═══════════════════════════════════════════════════════════════════════
2227
 
2228
- _session = None
2229
- _tokenizer = None
2230
  _phrase_embeddings = None
2231
  _phrase_topic_idx = []
2232
  _responses = []
@@ -2289,44 +2288,19 @@ _GREETING_PATTERNS = [
2289
 
2290
 
2291
  def _encode(texts):
2292
- """Tokenise + run ONNX inference + mean-pool + L2-normalise."""
2293
- import numpy as np
2294
- encodings = _tokenizer.encode_batch(texts)
2295
- ids = np.array([e.ids for e in encodings], dtype=np.int64)
2296
- mask = np.array([e.attention_mask for e in encodings], dtype=np.int64)
2297
- ttype = np.zeros_like(ids)
2298
- out = _session.run(
2299
- None,
2300
- {"input_ids": ids, "attention_mask": mask, "token_type_ids": ttype},
2301
- )
2302
- tok_emb = out[0] # (batch, seq_len, 384)
2303
- mask_exp = mask[:, :, np.newaxis].astype(np.float32)
2304
- pooled = np.sum(tok_emb * mask_exp, axis=1) / np.clip(
2305
- mask_exp.sum(axis=1), 1e-9, None
2306
- )
2307
- norms = np.clip(np.linalg.norm(pooled, axis=1, keepdims=True), 1e-9, None)
2308
- return pooled / norms
2309
 
2310
 
2311
  def _ensure_model():
2312
- """Download ONNX model + tokenizer and encode training phrases on first call."""
2313
- global _session, _tokenizer, _phrase_embeddings, _phrase_topic_idx, _responses
2314
- if _session is not None:
2315
  return
2316
 
2317
- import numpy as np
2318
- from huggingface_hub import hf_hub_download
2319
- from tokenizers import Tokenizer
2320
- import onnxruntime as ort
2321
-
2322
- repo = "Xenova/all-MiniLM-L6-v2"
2323
- tok_path = hf_hub_download(repo, "tokenizer.json")
2324
- model_path = hf_hub_download(repo, "onnx/model.onnx")
2325
-
2326
- _tokenizer = Tokenizer.from_file(tok_path)
2327
- _tokenizer.enable_padding()
2328
- _tokenizer.enable_truncation(max_length=128)
2329
- _session = ort.InferenceSession(model_path)
2330
 
2331
  _phrase_topic_idx = []
2332
  _responses = []
 
3
 
4
  Three-layer hybrid architecture:
5
  Layer 1 β€” Rule Engine: regex matches compiler error messages β†’ structured explanations
6
+ Layer 2 β€” Retriever: sentence-transformers (all-mpnet-base-v2) semantic search over 50+ KB topics
7
  Layer 3 β€” Default: help menu when nothing matches
8
 
9
  Plus: synonym expansion, greeting detection, follow-up context, multi-topic blending.
10
+ Uses the #1 ranked sentence embedding model for best semantic matching accuracy.
11
  All heavy imports are deferred so the server binds its port immediately.
12
  """
13
 
 
2222
 
2223
 
2224
  # ═══════════════════════════════════════════════════════════════════════
2225
+ # Sentence-Transformers (all-mpnet-base-v2) β€” lazy-loaded on first query
2226
  # ═══════════════════════════════════════════════════════════════════════
2227
 
2228
+ _st_model = None
 
2229
  _phrase_embeddings = None
2230
  _phrase_topic_idx = []
2231
  _responses = []
 
2288
 
2289
 
2290
  def _encode(texts):
2291
+ """Encode texts using sentence-transformers (returns L2-normalised embeddings)."""
2292
+ return _st_model.encode(texts, normalize_embeddings=True, show_progress_bar=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2293
 
2294
 
2295
  def _ensure_model():
2296
+ """Load sentence-transformers model and encode training phrases on first call."""
2297
+ global _st_model, _phrase_embeddings, _phrase_topic_idx, _responses
2298
+ if _st_model is not None:
2299
  return
2300
 
2301
+ from sentence_transformers import SentenceTransformer
2302
+
2303
+ _st_model = SentenceTransformer("all-mpnet-base-v2")
 
 
 
 
 
 
 
 
 
 
2304
 
2305
  _phrase_topic_idx = []
2306
  _responses = []
Dockerfile CHANGED
@@ -6,8 +6,8 @@ WORKDIR /app
6
  COPY requirements.txt .
7
  RUN pip install --no-cache-dir -r requirements.txt
8
 
9
- # Pre-download the ONNX model so first request is fast
10
- RUN python -c "from huggingface_hub import hf_hub_download; hf_hub_download('Xenova/all-MiniLM-L6-v2','tokenizer.json'); hf_hub_download('Xenova/all-MiniLM-L6-v2','onnx/model.onnx')"
11
 
12
  # Copy the entire project
13
  COPY . .
 
6
  COPY requirements.txt .
7
  RUN pip install --no-cache-dir -r requirements.txt
8
 
9
+ # Pre-download the sentence-transformers model so first request is fast
10
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')"
11
 
12
  # Copy the entire project
13
  COPY . .
requirements.txt CHANGED
@@ -3,7 +3,5 @@ flask-socketio==5.3.6
3
  flask-cors==4.0.0
4
  eventlet
5
  google-genai
6
- onnxruntime
7
- tokenizers
8
- huggingface-hub
9
  numpy
 
3
  flask-cors==4.0.0
4
  eventlet
5
  google-genai
6
+ sentence-transformers
 
 
7
  numpy