chaima01 commited on
Commit
8708b02
·
verified ·
1 Parent(s): 84e87c0

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +12 -17
handler.py CHANGED
@@ -1,28 +1,23 @@
1
  # handler.py
2
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
  import os
 
4
 
5
  class EndpointHandler:
6
  def __init__(self, model_dir: str):
7
- # load tokenizer & model from the same folder where handler.py lives
8
- self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
9
- self.model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
10
- # build a HF pipeline; device_map=“auto” will pick GPU if available
 
11
  self.generator = pipeline(
12
  "text2text-generation",
13
- model=self.model,
14
- tokenizer=self.tokenizer,
15
- device=0 # set to -1 if you want CPU only
16
  )
17
 
18
  def __call__(self, payload: dict) -> list:
19
- """
20
- Expects a JSON payload like:
21
- {"inputs": "<your question here>", "parameters": {"max_new_tokens": 200}}
22
- Returns the raw list of dicts that HF pipeline emits.
23
- """
24
- text = payload.get("inputs", "")
25
  params = payload.get("parameters", {})
26
- # run generation
27
- outputs = self.generator(text, **params)
28
- return outputs
 
1
  # handler.py
 
2
  import os
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
  class EndpointHandler:
6
  def __init__(self, model_dir: str):
7
+ # model_dir is already the repo root
8
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_dir).to("cuda")
10
+
11
+ # build a text2text pipeline on GPU (device=0)
12
  self.generator = pipeline(
13
  "text2text-generation",
14
+ model=model,
15
+ tokenizer=tokenizer,
16
+ device=0
17
  )
18
 
19
  def __call__(self, payload: dict) -> list:
20
+ # receive {"inputs": "...", "parameters": {...}}
21
+ text = payload.get("inputs", "")
 
 
 
 
22
  params = payload.get("parameters", {})
23
+ return self.generator(text, **params)