| from ast import literal_eval | |
| import torch | |
| from transformers import pipeline | |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification | |
| from transformers import BertForSequenceClassification, BertTokenizer, BertConfig | |
| from math import exp | |
| from . import label | |
| class Model(object): | |
| def __init__(self) -> None: | |
| self.model_name = "indolem/indobert-base-uncased" | |
| self.tokenizer = None | |
| self.model = None | |
| self.config = None | |
| def load_model(self, model_name: str = None, tasks: str = None): | |
| print(model_name) | |
| if tasks == "emotion": | |
| self.config = BertConfig.from_pretrained(model_name) | |
| self.tokenizer = BertTokenizer.from_pretrained(model_name) \ | |
| if tasks == "emotion" else \ | |
| AutoTokenizer.from_pretrained(model_name) | |
| if tasks == "emotion": | |
| self.model = BertForSequenceClassification.from_pretrained(model_name, config=self.config) | |
| elif tasks == "ner": | |
| self.model = AutoModelForTokenClassification.from_pretrained(model_name) | |
| else: | |
| self.model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| def predict(self, sentences, tasks: str = None): | |
| encoded_input = self.tokenizer(sentences, | |
| return_tensors="pt", | |
| padding=True, | |
| truncation=True) | |
| with torch.no_grad(): | |
| if tasks in ["emotion", "sentiment"]: | |
| outputs = self.model(**encoded_input) | |
| predicted_class = torch.argmax(outputs.logits, dim=1).item() | |
| logits = outputs.logits.numpy() | |
| probability = [exp(output)/(1+exp(output)) for output in logits[0]] | |
| else: | |
| recognizer = pipeline("token-classification", model=self.model, tokenizer=self.tokenizer) | |
| outputs = recognizer(sentences) | |
| if tasks in ["emotion", "sentiment"]: | |
| result = {"label": label[tasks][predicted_class], | |
| "score": probability[predicted_class]} | |
| elif tasks == "ner": | |
| result = [] | |
| for output in outputs: | |
| result.append( | |
| { | |
| "entity": output["entity"], | |
| "score": float(output["score"]), | |
| "index": int(output["index"]), | |
| "word": output["word"], | |
| "start": int(output["start"]), | |
| "end": int(output["end"]) | |
| } | |
| ) | |
| else: | |
| result = "" | |
| return result |