File size: 1,511 Bytes
eb628b0
84e87c0
8708b02
755e60c
 
 
9d37bf0
 
 
 
 
 
 
 
 
 
 
 
8708b02
9d37bf0
 
 
 
84e87c0
 
9d37bf0
 
 
 
 
84e87c0
eb628b0
755e60c
8708b02
84e87c0
9d37bf0
8708b02
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# handler.py
import os
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

class EndpointHandler:
    def __init__(self, model_dir: str):
        # some repos upload into a subfolder; detect that:
        # if there's exactly one directory in model_dir that itself has config.json,
        # assume that's the real checkpoint folder.
        candidates = [
            d for d in os.listdir(model_dir)
            if os.path.isdir(os.path.join(model_dir, d))
            and os.path.exists(os.path.join(model_dir, d, "config.json"))
        ]
        if len(candidates) == 1:
            real_dir = os.path.join(model_dir, candidates[0])
        else:
            real_dir = model_dir

        # now load from the folder that *actually* has your fine‑tuned files
        self.tokenizer = AutoTokenizer.from_pretrained(real_dir)
        self.model     = AutoModelForSeq2SeqLM.from_pretrained(real_dir)
        # build the pipeline on GPU if available
        self.generator = pipeline(
            "text2text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            device=0,            # GPU
            max_new_tokens=500,  # defaults you want in every call
            temperature=0.7
        )

    def __call__(self, payload: dict) -> list:
        text   = payload.get("inputs", "")
        params = payload.get("parameters", {})
        # Calls the pipeline with whatever per-call overrides you pass
        return self.generator(text, **params)