Spaces:

AshenR
/

ASPWords

Sleeping

App Files Files Community

ASPWords / app.py

AshenR

Update app.py

3e2a410 verified about 1 year ago

raw

history blame contribute delete

3.68 kB

	import streamlit as st
	from scipy.special import softmax
	import numpy as np
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import os
	from pytorch_transformers import BertForTokenClassification
	import torch
	from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
	max_len = 60
	# Define the directory where the model is saved
	bert_out_address = 'model/'

	# Load the configuration file
	config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	# Load the pre-trained model's weights for sequence classification
	model = BertForSequenceClassification(config)
	# model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))

	model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
	model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)

	# Load the tokenizer
	tokenizer = BertTokenizer.from_pretrained(bert_out_address)

	# Set the model to evaluation mode (if you're not going to train it further)
	model.eval()




	def predict(test_query):
	import torch
	tokenized_texts = []
	temp_token = []
	# Add [CLS] at the front
	temp_token.append('[CLS]')
	token_list = tokenizer.tokenize(test_query)
	for m,token in enumerate(token_list):
	temp_token.append(token)
	# Trim the token to fit the length requirement
	if len(temp_token) > max_len-1:
	temp_token= temp_token[:max_len-1]
	# Add [SEP] at the end
	temp_token.append('[SEP]')
	tokenized_texts.append(temp_token)
	input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
	maxlen=max_len, dtype="long", truncating="post", padding="post")
	attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
	segment_ids = [[0] * len(input_id) for input_id in input_ids]
	input_ids = torch.tensor(input_ids)
	attention_masks = torch.tensor(attention_masks)
	segment_ids = torch.tensor(segment_ids)

	# Assuming you have defined your model and input_ids somewhere before this
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	model.to(device) # Move model to GPU if available

	# Move input tensors to the same device as the model
	input_ids = input_ids.to(device)

	with torch.no_grad():
	outputs = model(input_ids, token_type_ids=None, attention_mask=None)
	logits = outputs[0] # Ensure this is on the same device

	# Make logits into numpy type predict result
	# The predict result contain each token's all tags predict result
	predict_results = logits.detach().cpu().numpy()

	from scipy.special import softmax
	result_arrays_soft = softmax(predict_results[0])
	tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
	result_array = result_arrays_soft
	result_list = np.argmax(result_array,axis=-1)
	asp = []
	for i, mark in enumerate(attention_masks[0]):
	if mark>0:
	if tag2name[result_list[i]] == "ASP":
	# print("Token:%s"%(temp_token[i]))
	asp.append(temp_token[i])
	# print("Predict_Tag:%s"%(tag2name[result_list[i]]))
	return asp

















	# Title for the Streamlit app
	st.title("Aspect Prediction App")

	# Text input
	user_input = st.text_area("Enter the text for Aspect Prection:", "")


	# Check if there is input text
	if user_input:
	# Perform sentiment analysis
	with st.spinner("Analyzing..."):
	result = predict(user_input)
	result



	# st.write(f"Aspects : {sentiment}")