|
|
import streamlit as st |
|
|
from scipy.special import softmax |
|
|
import numpy as np |
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
|
import os |
|
|
from pytorch_transformers import BertForTokenClassification |
|
|
import torch |
|
|
from transformers import BertConfig, BertForSequenceClassification, BertTokenizer |
|
|
max_len = 60 |
|
|
|
|
|
bert_out_address = 'model/' |
|
|
|
|
|
|
|
|
config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json")) |
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
|
|
|
|
|
model = BertForSequenceClassification(config) |
|
|
|
|
|
|
|
|
model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device))) |
|
|
model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5) |
|
|
|
|
|
|
|
|
tokenizer = BertTokenizer.from_pretrained(bert_out_address) |
|
|
|
|
|
|
|
|
model.eval() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict(test_query): |
|
|
import torch |
|
|
tokenized_texts = [] |
|
|
temp_token = [] |
|
|
|
|
|
temp_token.append('[CLS]') |
|
|
token_list = tokenizer.tokenize(test_query) |
|
|
for m,token in enumerate(token_list): |
|
|
temp_token.append(token) |
|
|
|
|
|
if len(temp_token) > max_len-1: |
|
|
temp_token= temp_token[:max_len-1] |
|
|
|
|
|
temp_token.append('[SEP]') |
|
|
tokenized_texts.append(temp_token) |
|
|
input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts], |
|
|
maxlen=max_len, dtype="long", truncating="post", padding="post") |
|
|
attention_masks = [[int(i>0) for i in ii] for ii in input_ids] |
|
|
segment_ids = [[0] * len(input_id) for input_id in input_ids] |
|
|
input_ids = torch.tensor(input_ids) |
|
|
attention_masks = torch.tensor(attention_masks) |
|
|
segment_ids = torch.tensor(segment_ids) |
|
|
|
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
input_ids = input_ids.to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(input_ids, token_type_ids=None, attention_mask=None) |
|
|
logits = outputs[0] |
|
|
|
|
|
|
|
|
|
|
|
predict_results = logits.detach().cpu().numpy() |
|
|
|
|
|
from scipy.special import softmax |
|
|
result_arrays_soft = softmax(predict_results[0]) |
|
|
tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'} |
|
|
result_array = result_arrays_soft |
|
|
result_list = np.argmax(result_array,axis=-1) |
|
|
asp = [] |
|
|
for i, mark in enumerate(attention_masks[0]): |
|
|
if mark>0: |
|
|
if tag2name[result_list[i]] == "ASP": |
|
|
|
|
|
asp.append(temp_token[i]) |
|
|
|
|
|
return asp |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("Aspect Prediction App") |
|
|
|
|
|
|
|
|
user_input = st.text_area("Enter the text for Aspect Prection:", "") |
|
|
|
|
|
|
|
|
|
|
|
if user_input: |
|
|
|
|
|
with st.spinner("Analyzing..."): |
|
|
result = predict(user_input) |
|
|
result |
|
|
|
|
|
|
|
|
|
|
|
|