chaima01
/

flan-t5-pilgrim-full

Text Generation

text2text-generation

Model card Files Files and versions

chaima01 commited on May 2, 2025

Commit

9d37bf0

·

verified ·

1 Parent(s): 8708b02

Update handler.py

Files changed (1) hide show

handler.py +22 -8

handler.py CHANGED Viewed

@@ -4,20 +4,34 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 class EndpointHandler:
     def __init__(self, model_dir: str):
-        # model_dir is already the repo root
-        tokenizer = AutoTokenizer.from_pretrained(model_dir)
-        model     = AutoModelForSeq2SeqLM.from_pretrained(model_dir).to("cuda")
-        # build a text2text pipeline on GPU (device=0)
         self.generator = pipeline(
             "text2text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            device=0
         )
     def __call__(self, payload: dict) -> list:
-        # receive {"inputs": "...", "parameters": {...}}
         text   = payload.get("inputs", "")
         params = payload.get("parameters", {})
         return self.generator(text, **params)

 class EndpointHandler:
     def __init__(self, model_dir: str):
+        # some repos upload into a subfolder; detect that:
+        # if there's exactly one directory in model_dir that itself has config.json,
+        # assume that's the real checkpoint folder.
+        candidates = [
+            d for d in os.listdir(model_dir)
+            if os.path.isdir(os.path.join(model_dir, d))
+            and os.path.exists(os.path.join(model_dir, d, "config.json"))
+        ]
+        if len(candidates) == 1:
+            real_dir = os.path.join(model_dir, candidates[0])
+        else:
+            real_dir = model_dir
+        # now load from the folder that *actually* has your fine‑tuned files
+        self.tokenizer = AutoTokenizer.from_pretrained(real_dir)
+        self.model     = AutoModelForSeq2SeqLM.from_pretrained(real_dir)
+        # build the pipeline on GPU if available
         self.generator = pipeline(
             "text2text-generation",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            device=0,            # GPU
+            max_new_tokens=500,  # defaults you want in every call
+            temperature=0.7
         )
     def __call__(self, payload: dict) -> list:
         text   = payload.get("inputs", "")
         params = payload.get("parameters", {})
+        # Calls the pipeline with whatever per-call overrides you pass
         return self.generator(text, **params)