Spaces:
Sleeping
Sleeping
alexchilton Copilot commited on
Commit ·
dee0c43
1
Parent(s): 580fab1
Fix ML model loading: add sentencepiece + use_fast=False for XLMRoberta tokenizer
Browse filesThe tokenizer was failing with 'Converting from Tiktoken failed' because
sentencepiece package was missing from requirements. Also added use_fast=False
as a safety fallback for the slow tokenizer path.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
backend/app/api/health.py
CHANGED
|
@@ -78,7 +78,7 @@ async def model_diagnostics():
|
|
| 78 |
try:
|
| 79 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 80 |
t0 = time.time()
|
| 81 |
-
tok = AutoTokenizer.from_pretrained(settings.sentiment_model, cache_dir=settings.model_cache_dir)
|
| 82 |
model = AutoModelForSequenceClassification.from_pretrained(settings.sentiment_model, cache_dir=settings.model_cache_dir)
|
| 83 |
model.eval()
|
| 84 |
elapsed = round(time.time() - t0, 2)
|
|
|
|
| 78 |
try:
|
| 79 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 80 |
t0 = time.time()
|
| 81 |
+
tok = AutoTokenizer.from_pretrained(settings.sentiment_model, cache_dir=settings.model_cache_dir, use_fast=False)
|
| 82 |
model = AutoModelForSequenceClassification.from_pretrained(settings.sentiment_model, cache_dir=settings.model_cache_dir)
|
| 83 |
model.eval()
|
| 84 |
elapsed = round(time.time() - t0, 2)
|
backend/app/services/sentiment.py
CHANGED
|
@@ -42,6 +42,7 @@ def _load_model():
|
|
| 42 |
_tokenizer = AutoTokenizer.from_pretrained(
|
| 43 |
model_name,
|
| 44 |
cache_dir=settings.model_cache_dir,
|
|
|
|
| 45 |
)
|
| 46 |
logger.info("tokenizer_loaded", model=model_name, elapsed=round(time.time() - t0, 2))
|
| 47 |
|
|
|
|
| 42 |
_tokenizer = AutoTokenizer.from_pretrained(
|
| 43 |
model_name,
|
| 44 |
cache_dir=settings.model_cache_dir,
|
| 45 |
+
use_fast=False,
|
| 46 |
)
|
| 47 |
logger.info("tokenizer_loaded", model=model_name, elapsed=round(time.time() - t0, 2))
|
| 48 |
|
backend/requirements.txt
CHANGED
|
@@ -52,3 +52,5 @@ httpx==0.28.1
|
|
| 52 |
# Utilities
|
| 53 |
python-dotenv==1.0.1
|
| 54 |
tenacity==9.0.0
|
|
|
|
|
|
|
|
|
| 52 |
# Utilities
|
| 53 |
python-dotenv==1.0.1
|
| 54 |
tenacity==9.0.0
|
| 55 |
+
sentencepiece==0.2.0
|
| 56 |
+
protobuf==5.29.3
|