File size: 1,511 Bytes
eb628b0 84e87c0 8708b02 755e60c 9d37bf0 8708b02 9d37bf0 84e87c0 9d37bf0 84e87c0 eb628b0 755e60c 8708b02 84e87c0 9d37bf0 8708b02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# handler.py
import os
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
class EndpointHandler:
def __init__(self, model_dir: str):
# some repos upload into a subfolder; detect that:
# if there's exactly one directory in model_dir that itself has config.json,
# assume that's the real checkpoint folder.
candidates = [
d for d in os.listdir(model_dir)
if os.path.isdir(os.path.join(model_dir, d))
and os.path.exists(os.path.join(model_dir, d, "config.json"))
]
if len(candidates) == 1:
real_dir = os.path.join(model_dir, candidates[0])
else:
real_dir = model_dir
# now load from the folder that *actually* has your fine‑tuned files
self.tokenizer = AutoTokenizer.from_pretrained(real_dir)
self.model = AutoModelForSeq2SeqLM.from_pretrained(real_dir)
# build the pipeline on GPU if available
self.generator = pipeline(
"text2text-generation",
model=self.model,
tokenizer=self.tokenizer,
device=0, # GPU
max_new_tokens=500, # defaults you want in every call
temperature=0.7
)
def __call__(self, payload: dict) -> list:
text = payload.get("inputs", "")
params = payload.get("parameters", {})
# Calls the pipeline with whatever per-call overrides you pass
return self.generator(text, **params)
|