import streamlit as st from scipy.special import softmax import numpy as np from tensorflow.keras.preprocessing.sequence import pad_sequences import os from pytorch_transformers import BertForTokenClassification import torch from transformers import BertConfig, BertForSequenceClassification, BertTokenizer max_len = 60 # Define the directory where the model is saved bert_out_address = 'model/' # Load the configuration file config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json")) device = 'cuda' if torch.cuda.is_available() else 'cpu' # Load the pre-trained model's weights for sequence classification model = BertForSequenceClassification(config) # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"))) model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device))) model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5) # Load the tokenizer tokenizer = BertTokenizer.from_pretrained(bert_out_address) # Set the model to evaluation mode (if you're not going to train it further) model.eval() def predict(test_query): import torch tokenized_texts = [] temp_token = [] # Add [CLS] at the front temp_token.append('[CLS]') token_list = tokenizer.tokenize(test_query) for m,token in enumerate(token_list): temp_token.append(token) # Trim the token to fit the length requirement if len(temp_token) > max_len-1: temp_token= temp_token[:max_len-1] # Add [SEP] at the end temp_token.append('[SEP]') tokenized_texts.append(temp_token) input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts], maxlen=max_len, dtype="long", truncating="post", padding="post") attention_masks = [[int(i>0) for i in ii] for ii in input_ids] segment_ids = [[0] * len(input_id) for input_id in input_ids] input_ids = torch.tensor(input_ids) attention_masks = torch.tensor(attention_masks) segment_ids = torch.tensor(segment_ids) # Assuming you have defined your model and input_ids somewhere before this device = 'cuda' if torch.cuda.is_available() else 'cpu' model.to(device) # Move model to GPU if available # Move input tensors to the same device as the model input_ids = input_ids.to(device) with torch.no_grad(): outputs = model(input_ids, token_type_ids=None, attention_mask=None) logits = outputs[0] # Ensure this is on the same device # Make logits into numpy type predict result # The predict result contain each token's all tags predict result predict_results = logits.detach().cpu().numpy() from scipy.special import softmax result_arrays_soft = softmax(predict_results[0]) tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'} result_array = result_arrays_soft result_list = np.argmax(result_array,axis=-1) asp = [] for i, mark in enumerate(attention_masks[0]): if mark>0: if tag2name[result_list[i]] == "ASP": # print("Token:%s"%(temp_token[i])) asp.append(temp_token[i]) # print("Predict_Tag:%s"%(tag2name[result_list[i]])) return asp # Title for the Streamlit app st.title("Aspect Prediction App") # Text input user_input = st.text_area("Enter the text for Aspect Prection:", "") # Check if there is input text if user_input: # Perform sentiment analysis with st.spinner("Analyzing..."): result = predict(user_input) result # st.write(f"**Aspects** : {sentiment}")