File size: 6,092 Bytes
0589f45 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | import sys
sys.path.append('../')
from fastapi import FastAPI, Request, Query
from flask import Flask, request, jsonify
import joblib
import gensim
import spacy
import pickle
import pandas as pd
import torch
import uvicorn
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
app = FastAPI()
path = '../data/GoogleNews-vectors-negative300.bin.gz.gz'
w2v = gensim.models.KeyedVectors.load_word2vec_format(path, binary=True)
try:
spacy.load('en_core_web_lg')
except:
spacy.cli.download('en_core_web_lg')
def get_word_vector(word):
"""
Transform a word into a list of components of that word vector
Args:
word(str): a single word
Returns:
if the word to vector doesnt have the word returns None
otherwise, returns the list of components, which create a vector
"""
if word in w2v:
return w2v[word]
else:
return None
@app.get('/pipeline')
def pipeline(text=""):
"""
Endpoint to process text through a pre-trained pipeline.
This route accepts a text input either as a query parameter or as a function argument,
and processes it using a pre-trained pipeline loaded from a pickle file. The processed
tokens are then returned.
Args:
text (str): The input text to be processed. Default is an empty string.
If empty, the function will attempt to get the text from the
query parameter 'text'.
Returns:
text (str): The processed tokens output by the pipeline. The type and structure
of the output is also a text after the process.
"""
if text == "":
text = request.args.get('text', '')
path = '../data/pipeline.joblib'
pipeline = joblib.load(path)
tokens = pipeline.transform(text)
return tokens
@app.get('/w2v')
def word2vec(tokens=""):
"""
Endpoint to get word vectors for a list of tokens.
This route accepts a comma-separated list of tokens either as a query parameter or
as a function argument and returns their corresponding word vectors.
Args:
tokens (str): A comma-separated string of tokens. Default is an empty string.
If empty, the function will attempt to get tokens from the query
parameter 'tokens'.
Returns:
list: A list which contains the sum of all word vectors for the provided tokens.
"""
if tokens == "":
tokens = request.args.get('tokens', '')
tokens = str(tokens).strip('[').strip(']').split(',')
array_vectors = []
for token in tokens:
vector = get_word_vector(token)
if vector is not None:
array_vectors.append(vector.tolist())
vector = [0 for i in range(0, len(array_vectors[0]))]
for item in array_vectors:
for i in range(0, len(item)):
vector[i] += item[i]
return vector
@app.get('/model')
def model(vectors=""):
"""
Endpoint to return the predicted value based on the word vector
Args:
vectors (str): a list with the word vectors
Returns:
json: key "predictions" which contains the 1, 0 or -1
"""
if vectors == "":
vectors = request.args.get('vectors', '')
vectors_str = str(vectors).replace(" ", '').strip('[').strip(']')
brute_array = vectors_str.split(',')
vectors_array = {str(i): [float(brute_array[i])] for i in range(0, len(brute_array))}
vectors_array['id'] = [0]
path = '../data/model.pkl'
with open(path, 'rb') as file:
model = pickle.load(file)
data = pd.DataFrame(vectors_array)
results = model.predict(data)
return jsonify({"predictions": results[0]})
model_path = "../data/BERT_model_and_tokenizer.pkl"
with open(model_path, 'rb') as f:
model, tokenizer = pickle.load(f)
model.eval()
def classify_sentiment(text):
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
predictions = outputs.logits.argmax(-1).item()
predictions = "Negative" if predictions == 0 else "Non-negative"
return predictions
@app.get('/prescribe')
def prescribe(text: str = Query(...)):
"""
Endpoint to return the predicted value based on a text
Args:
text (str): the text that will be prescribed
Returns:
json: key "predictions" which contains the predicted values 1 (Negative) or 0 (Non-negative)
"""
if text == "":
return {"error": "No text provided"}
predictions = classify_sentiment(text)
if predictions == "Negative":
visual = "<div style='display: flex; justify-content: center; text-align: center;; height: 100%;'><span style='color:red; font-size: 30px;'> ⚠ ATTENTION ⚠ <br> The sentence has been classified as negative. <br> Please review it carefully! </span></div>"
else:
visual = "<div style='display: flex; justify-content: center; text-align: center;; height: 100%;'><span style='color:green; font-size: 30px;'>All good! ⭐ <br> The sentence is considered non-negative.</span></div>"
return predictions, visual
with gr.Blocks() as io:
gr.Markdown("<h1 style='text-align: center;'>Emotion</h1>")
gr.Markdown("<h3 style='text-align: center;'>BERT - Sentiment Classifier</h3>")
with gr.Row():
text_input = gr.Textbox(label="Enter text to classify its sentiment here:")
with gr.Column():
sentiment_label = gr.Label(label="The text is...")
html_output = gr.HTML()
text_input.change(fn=prescribe, inputs=text_input, outputs=[sentiment_label, html_output])
app = gr.mount_gradio_app(app, io, path="/interface")
if __name__ == '__main__':
uvicorn.run(app, debug=True)
|