File size: 6,092 Bytes
0589f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import sys
sys.path.append('../')
from fastapi import FastAPI, Request, Query
from flask import Flask, request, jsonify
import joblib
import gensim
import spacy
import pickle
import pandas as pd
import torch
import uvicorn
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification

app = FastAPI()

path = '../data/GoogleNews-vectors-negative300.bin.gz.gz'
w2v = gensim.models.KeyedVectors.load_word2vec_format(path, binary=True)   
try:
    spacy.load('en_core_web_lg')
except:
    spacy.cli.download('en_core_web_lg')

def get_word_vector(word):
    """

    Transform a word into a list of components of that word vector



    Args:

        word(str): a single word

    Returns:

        if the word to vector doesnt have the word returns None

        otherwise, returns the list of components, which create a vector

        

    """
    if word in w2v:
        return w2v[word]
    else:
        return None


@app.get('/pipeline')
def pipeline(text=""):
    """

    Endpoint to process text through a pre-trained pipeline.



    This route accepts a text input either as a query parameter or as a function argument,

    and processes it using a pre-trained pipeline loaded from a pickle file. The processed

    tokens are then returned.



    Args:

        text (str): The input text to be processed. Default is an empty string.

                    If empty, the function will attempt to get the text from the 

                    query parameter 'text'.



    Returns:

        text (str): The processed tokens output by the pipeline. The type and structure 

             of the output is also a text after the process.

    """
    
    if text == "":
        text = request.args.get('text', '')

    path = '../data/pipeline.joblib'
    pipeline = joblib.load(path)
        
    tokens = pipeline.transform(text)
        
    return tokens


@app.get('/w2v')
def word2vec(tokens=""):
    """

    Endpoint to get word vectors for a list of tokens.



    This route accepts a comma-separated list of tokens either as a query parameter or 

    as a function argument and returns their corresponding word vectors.



    Args:

        tokens (str): A comma-separated string of tokens. Default is an empty string.

                      If empty, the function will attempt to get tokens from the query 

                      parameter 'tokens'.



    Returns:

        list: A list which contains the sum of all word vectors for the provided tokens.

    """
    if tokens == "":
        tokens = request.args.get('tokens', '')
        
    tokens = str(tokens).strip('[').strip(']').split(',')

    array_vectors = []
    for token in tokens:
        vector = get_word_vector(token)
        if vector is not None:
            array_vectors.append(vector.tolist())
    
    vector = [0 for i in range(0, len(array_vectors[0]))]
    
    for item in array_vectors:
        for i in range(0, len(item)):
            vector[i] += item[i]

    return vector


@app.get('/model')
def model(vectors=""):
    """

    Endpoint to return the predicted value based on the word vector



    Args:

        vectors (str): a list with the word vectors

    Returns:

        json: key "predictions" which contains the 1, 0 or -1

    """
    if vectors == "":
        vectors = request.args.get('vectors', '')

    vectors_str = str(vectors).replace(" ", '').strip('[').strip(']')
    brute_array = vectors_str.split(',')
    vectors_array = {str(i): [float(brute_array[i])] for i in range(0, len(brute_array))}
    vectors_array['id'] = [0]

    path = '../data/model.pkl'
    with open(path, 'rb') as file:
        model = pickle.load(file)
        
    data = pd.DataFrame(vectors_array)
    results = model.predict(data)
    return jsonify({"predictions": results[0]})

model_path = "../data/BERT_model_and_tokenizer.pkl"

with open(model_path, 'rb') as f:
    model, tokenizer = pickle.load(f)

model.eval()  

def classify_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = outputs.logits.argmax(-1).item()
    
    predictions = "Negative" if predictions == 0 else "Non-negative"
    
    return predictions

@app.get('/prescribe')
def prescribe(text: str = Query(...)):
    """

    Endpoint to return the predicted value based on a text



    Args:

        text (str): the text that will be prescribed

    

    Returns:

        json: key "predictions" which contains the predicted values 1 (Negative) or 0 (Non-negative)

    """

    if text == "":
        return {"error": "No text provided"}
    
    predictions = classify_sentiment(text)
    
    if predictions == "Negative":
        visual = "<div style='display: flex; justify-content: center; text-align: center;; height: 100%;'><span style='color:red; font-size: 30px;'> &#9888; ATTENTION &#9888; <br> The sentence has been classified as negative. <br> Please review it carefully! </span></div>"
    else:
        visual = "<div style='display: flex; justify-content: center; text-align: center;; height: 100%;'><span style='color:green; font-size: 30px;'>All good! &#11088; <br> The sentence is considered non-negative.</span></div>"
    
    return predictions, visual


with gr.Blocks() as io:
    gr.Markdown("<h1 style='text-align: center;'>Emotion</h1>")
    gr.Markdown("<h3 style='text-align: center;'>BERT - Sentiment Classifier</h3>")

    with gr.Row():
        text_input = gr.Textbox(label="Enter text to classify its sentiment here:")

    with gr.Column():
        sentiment_label = gr.Label(label="The text is...")
        html_output = gr.HTML()

    text_input.change(fn=prescribe, inputs=text_input, outputs=[sentiment_label, html_output])

app = gr.mount_gradio_app(app, io, path="/interface")

if __name__ == '__main__':
    uvicorn.run(app, debug=True)