| from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline | |
| def model_fn(model_dir): | |
| """ | |
| Load the model and tokenizer from the specified paths | |
| :param model_dir: | |
| :return: | |
| """ | |
| tokenizer = AutoTokenizer.from_pretrained(model_dir) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_dir) | |
| return model, tokenizer | |
| def predict_fn(data, model_and_tokenizer): | |
| # destruct model and tokenizer | |
| model, tokenizer = model_and_tokenizer | |
| bert_pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, | |
| truncation=True, max_length=512, return_all_scores=True) | |
| # Tokenize the input, pick up first 512 tokens before passing it further | |
| tokens = tokenizer.encode(data['inputs'], add_special_tokens=False, max_length=512, truncation=True) | |
| input_data = tokenizer.decode(tokens) | |
| return bert_pipe(input_data) | |