mbochniak01 Claude Sonnet 4.6 commited on
Commit ·
69c362c
1
Parent(s): 86cfc1b
Use T5Tokenizer directly for Vectara HHEM v2
Browse filesAutoTokenizer can't resolve HHEMv2Config (custom class, not registered).
HHEM v2 is T5-small based — T5Tokenizer loads without auto-detection.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- Dockerfile +2 -2
- backend/grader.py +2 -2
Dockerfile
CHANGED
|
@@ -13,9 +13,9 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 13 |
# Pre-download models so first request isn't slow on HF Spaces
|
| 14 |
RUN python -c "\
|
| 15 |
from sentence_transformers import SentenceTransformer; \
|
| 16 |
-
from transformers import
|
| 17 |
SentenceTransformer('all-MiniLM-L6-v2'); \
|
| 18 |
-
tok =
|
| 19 |
pipeline('text-classification', model='vectara/hallucination_evaluation_model', tokenizer=tok, trust_remote_code=True)"
|
| 20 |
|
| 21 |
COPY knowledge/ ./knowledge/
|
|
|
|
| 13 |
# Pre-download models so first request isn't slow on HF Spaces
|
| 14 |
RUN python -c "\
|
| 15 |
from sentence_transformers import SentenceTransformer; \
|
| 16 |
+
from transformers import T5Tokenizer, pipeline; \
|
| 17 |
SentenceTransformer('all-MiniLM-L6-v2'); \
|
| 18 |
+
tok = T5Tokenizer.from_pretrained('vectara/hallucination_evaluation_model'); \
|
| 19 |
pipeline('text-classification', model='vectara/hallucination_evaluation_model', tokenizer=tok, trust_remote_code=True)"
|
| 20 |
|
| 21 |
COPY knowledge/ ./knowledge/
|
backend/grader.py
CHANGED
|
@@ -16,7 +16,7 @@ from typing import Any
|
|
| 16 |
|
| 17 |
from sentence_transformers import SentenceTransformer
|
| 18 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 19 |
-
from transformers import
|
| 20 |
|
| 21 |
from config import EMBEDDER_MODEL
|
| 22 |
from rosetta import check_terminology
|
|
@@ -41,7 +41,7 @@ def get_nli_model() -> Any:
|
|
| 41 |
"""Return the shared Vectara faithfulness pipeline, loading it on first call."""
|
| 42 |
global _nli_model
|
| 43 |
if _nli_model is None:
|
| 44 |
-
tokenizer =
|
| 45 |
_nli_model = hf_pipeline(
|
| 46 |
"text-classification",
|
| 47 |
model=NLI_MODEL,
|
|
|
|
| 16 |
|
| 17 |
from sentence_transformers import SentenceTransformer
|
| 18 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 19 |
+
from transformers import T5Tokenizer, pipeline as hf_pipeline
|
| 20 |
|
| 21 |
from config import EMBEDDER_MODEL
|
| 22 |
from rosetta import check_terminology
|
|
|
|
| 41 |
"""Return the shared Vectara faithfulness pipeline, loading it on first call."""
|
| 42 |
global _nli_model
|
| 43 |
if _nli_model is None:
|
| 44 |
+
tokenizer = T5Tokenizer.from_pretrained(NLI_MODEL)
|
| 45 |
_nli_model = hf_pipeline(
|
| 46 |
"text-classification",
|
| 47 |
model=NLI_MODEL,
|