File size: 2,452 Bytes
b417154
782700e
5f575ac
6530227
b417154
 
 
 
 
 
5f575ac
b417154
 
 
 
782700e
5f575ac
b417154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f575ac
e4aa4ec
 
 
b417154
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from typing import List, Dict, Any
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

class EndpointHandler():
    def __init__(self, path=""):
        self.model = model = AutoModelForSeq2SeqLM.from_pretrained(path)
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        self.bos_instruction = "Definition: The output will be the aspects (both implicit and explicit) the corresponding opinion/describing terms and the sentiment polarity (positive, negative, neutral) of the opinion term . In cases where there are no aspects the output should be noaspectterm:none:none.\n        Positive example 1-\n        input: Faan 's got a great concept but a little rough on the delivery .\n        output: delivery:rough:positive\n        Positive example 2- \n        input: I just wonder how you can have such a delicious meal for such little money .\n        output: meal:delicious:positive, money:little:positive\n        Now complete the following example-\n        input: "
        self.eos_instruct = ' \noutput:'
        self.delim_instruct = ''

    def preprocess(self, inputs: List[str]):
        processed_inputs = []
        for text in inputs:
            processed_inputs.append(self.bos_instruction + text + self.delim_instruct + self.eos_instruct)
        return self.tokenizer(processed_inputs, return_tensors="pt", truncation=True, padding=True)

    def postprocess(self, outputs):
        output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        aspects = []
        opinions = []
        sentiments = []
        for triplet in output.split(','):
            triplet = triplet.strip()
            if triplet == '':
                continue
            aos = triplet.split(':') # aos = aspect - opinion - sentiment
            aspects.append(aos[0])
            opinions.append(aos[1])
            sentiments.append(aos[2])
        result = ""
        for i in range(len(aspects)): # or opinions/aspects because they have the same length
            result += f"Aspect: {aspects[i]} - Opinion: {opinions[i]} - Sentiment: {sentiments[i]}\n"
        result = result.strip()
        return result

    def __call__(self, data: Dict[str, Any]):
        inputs = data.pop('inputs', data)
        parameters = data.pop("parameters", None)
        model_inputs = self.preprocess(inputs)
        model_outputs = self.model.generate(model_inputs.input_ids)
        return self.postprocess(model_outputs)