ASPWords / app.py
AshenR's picture
Update app.py
3e2a410 verified
import streamlit as st
from scipy.special import softmax
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pytorch_transformers import BertForTokenClassification
import torch
from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
max_len = 60
# Define the directory where the model is saved
bert_out_address = 'model/'
# Load the configuration file
config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Load the pre-trained model's weights for sequence classification
model = BertForSequenceClassification(config)
# model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))
model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)
# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained(bert_out_address)
# Set the model to evaluation mode (if you're not going to train it further)
model.eval()
def predict(test_query):
import torch
tokenized_texts = []
temp_token = []
# Add [CLS] at the front
temp_token.append('[CLS]')
token_list = tokenizer.tokenize(test_query)
for m,token in enumerate(token_list):
temp_token.append(token)
# Trim the token to fit the length requirement
if len(temp_token) > max_len-1:
temp_token= temp_token[:max_len-1]
# Add [SEP] at the end
temp_token.append('[SEP]')
tokenized_texts.append(temp_token)
input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
maxlen=max_len, dtype="long", truncating="post", padding="post")
attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
segment_ids = [[0] * len(input_id) for input_id in input_ids]
input_ids = torch.tensor(input_ids)
attention_masks = torch.tensor(attention_masks)
segment_ids = torch.tensor(segment_ids)
# Assuming you have defined your model and input_ids somewhere before this
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device) # Move model to GPU if available
# Move input tensors to the same device as the model
input_ids = input_ids.to(device)
with torch.no_grad():
outputs = model(input_ids, token_type_ids=None, attention_mask=None)
logits = outputs[0] # Ensure this is on the same device
# Make logits into numpy type predict result
# The predict result contain each token's all tags predict result
predict_results = logits.detach().cpu().numpy()
from scipy.special import softmax
result_arrays_soft = softmax(predict_results[0])
tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
result_array = result_arrays_soft
result_list = np.argmax(result_array,axis=-1)
asp = []
for i, mark in enumerate(attention_masks[0]):
if mark>0:
if tag2name[result_list[i]] == "ASP":
# print("Token:%s"%(temp_token[i]))
asp.append(temp_token[i])
# print("Predict_Tag:%s"%(tag2name[result_list[i]]))
return asp
# Title for the Streamlit app
st.title("Aspect Prediction App")
# Text input
user_input = st.text_area("Enter the text for Aspect Prection:", "")
# Check if there is input text
if user_input:
# Perform sentiment analysis
with st.spinner("Analyzing..."):
result = predict(user_input)
result
# st.write(f"**Aspects** : {sentiment}")