File size: 549 Bytes
2412a74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from transformers import AutoTokenizer

class EndpointHandler():
    def __init__(self, path=""):
        self.tokenizer =  AutoTokenizer.from_pretrained("otmanheddouch/shakespear-tokenizer")

    def __call__(self, data: str] -> Dict[str, Any]]:
        """
       data args:
            data:  text input
      Return:
            output (dict) : outputs with tokens and their ids
        """
        tokens = self.tokenizer.tokenize()
        tokens_ids = self.tokenizer.encode(example)
        
        return {"tokens:"tokens, "ids":tokens_ids}