mbochniak01 Claude Sonnet 4.6 commited on
Commit
a42a9e0
Β·
1 Parent(s): 1ea03d4

Load Vectara model via transformers pipeline, not CrossEncoder

Browse files

CrossEncoder conflicts with HHEMv2's custom architecture on newer
transformers (tied weights resolution mismatch). Vectara's own docs
recommend transformers.pipeline("text-classification").

Pipeline output: {"label": "Factually Consistent", "score": float}
Normalise: if label is inconsistent, faithfulness = 1 - score.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. Dockerfile +3 -2
  2. backend/grader.py +21 -9
Dockerfile CHANGED
@@ -12,9 +12,10 @@ RUN pip install --no-cache-dir -r requirements.txt
12
 
13
  # Pre-download models so first request isn't slow on HF Spaces
14
  RUN python -c "\
15
- from sentence_transformers import SentenceTransformer, CrossEncoder; \
 
16
  SentenceTransformer('all-MiniLM-L6-v2'); \
17
- CrossEncoder('vectara/hallucination_evaluation_model', trust_remote_code=True)"
18
 
19
  COPY knowledge/ ./knowledge/
20
  COPY backend/ ./backend/
 
12
 
13
  # Pre-download models so first request isn't slow on HF Spaces
14
  RUN python -c "\
15
+ from sentence_transformers import SentenceTransformer; \
16
+ from transformers import pipeline; \
17
  SentenceTransformer('all-MiniLM-L6-v2'); \
18
+ pipeline('text-classification', model='vectara/hallucination_evaluation_model', trust_remote_code=True)"
19
 
20
  COPY knowledge/ ./knowledge/
21
  COPY backend/ ./backend/
backend/grader.py CHANGED
@@ -5,16 +5,18 @@ Metrics:
5
  pii_leakage β€” regex scan for PII patterns in response
6
  token_budget β€” response within allowed token ceiling
7
  answer_relevancy β€” cosine similarity between query and response embeddings
8
- faithfulness β€” NLI cross-encoder: is response grounded in retrieved context?
9
  chain_terminology β€” deterministic: client-specific terms used (via RosettaStone)
10
  """
11
 
12
  import re
13
  import logging
14
  from dataclasses import dataclass, field
 
15
 
16
- from sentence_transformers import SentenceTransformer, CrossEncoder
17
  from sklearn.metrics.pairwise import cosine_similarity
 
18
 
19
  from config import EMBEDDER_MODEL
20
  from rosetta import check_terminology
@@ -22,7 +24,7 @@ from rosetta import check_terminology
22
  log = logging.getLogger(__name__)
23
 
24
  _embedder: SentenceTransformer | None = None
25
- _nli_model: CrossEncoder | None = None
26
 
27
  NLI_MODEL = "vectara/hallucination_evaluation_model"
28
 
@@ -35,11 +37,17 @@ def get_embedder() -> SentenceTransformer:
35
  return _embedder
36
 
37
 
38
- def get_nli_model() -> CrossEncoder:
39
- """Return the shared NLI cross-encoder, loading it on first call."""
40
  global _nli_model
41
  if _nli_model is None:
42
- _nli_model = CrossEncoder(NLI_MODEL, trust_remote_code=True)
 
 
 
 
 
 
43
  return _nli_model
44
 
45
 
@@ -153,9 +161,13 @@ def grade_faithfulness(response: str, context: str) -> GradeResult:
153
  if not raw_chunks:
154
  return GradeResult(metric="faithfulness", passed=False, score=0.0, detail="No context")
155
  chunks = [_strip_chunk_title(c) for c in raw_chunks]
156
- # Vectara model: predict((document, response)) β†’ faithfulness probability in [0, 1]
157
- pairs = [(chunk, response) for chunk in chunks]
158
- scores = model.predict(pairs)
 
 
 
 
159
  score = float(max(scores))
160
  passed = score >= FAITHFULNESS_THRESHOLD
161
  return GradeResult(
 
5
  pii_leakage β€” regex scan for PII patterns in response
6
  token_budget β€” response within allowed token ceiling
7
  answer_relevancy β€” cosine similarity between query and response embeddings
8
+ faithfulness β€” Vectara HHEM v2: RAG faithfulness probability per (doc, response) pair
9
  chain_terminology β€” deterministic: client-specific terms used (via RosettaStone)
10
  """
11
 
12
  import re
13
  import logging
14
  from dataclasses import dataclass, field
15
+ from typing import Any
16
 
17
+ from sentence_transformers import SentenceTransformer
18
  from sklearn.metrics.pairwise import cosine_similarity
19
+ from transformers import pipeline as hf_pipeline
20
 
21
  from config import EMBEDDER_MODEL
22
  from rosetta import check_terminology
 
24
  log = logging.getLogger(__name__)
25
 
26
  _embedder: SentenceTransformer | None = None
27
+ _nli_model: Any = None
28
 
29
  NLI_MODEL = "vectara/hallucination_evaluation_model"
30
 
 
37
  return _embedder
38
 
39
 
40
+ def get_nli_model() -> Any:
41
+ """Return the shared Vectara faithfulness pipeline, loading it on first call."""
42
  global _nli_model
43
  if _nli_model is None:
44
+ _nli_model = hf_pipeline(
45
+ "text-classification",
46
+ model=NLI_MODEL,
47
+ trust_remote_code=True,
48
+ truncation=True,
49
+ max_length=512,
50
+ )
51
  return _nli_model
52
 
53
 
 
161
  if not raw_chunks:
162
  return GradeResult(metric="faithfulness", passed=False, score=0.0, detail="No context")
163
  chunks = [_strip_chunk_title(c) for c in raw_chunks]
164
+ # Vectara HHEM v2: text-classification pipeline, label "Factually Consistent" = faithful
165
+ pairs = [{"text": chunk, "text_pair": response} for chunk in chunks]
166
+ results = model(pairs)
167
+ scores = [
168
+ r["score"] if "consistent" in r["label"].lower() else 1.0 - r["score"]
169
+ for r in results
170
+ ]
171
  score = float(max(scores))
172
  passed = score >= FAITHFULNESS_THRESHOLD
173
  return GradeResult(