File size: 3,681 Bytes
f948662
c24bb0e
 
 
 
e8f2766
4bfc56c
 
a2db1d8
4bfc56c
38be73f
4bfc56c
 
 
73337c1
4bfc56c
 
 
73337c1
 
 
27dc83c
4bfc56c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803b81a
4bfc56c
 
 
 
 
 
 
 
 
f539e87
4bfc56c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f948662
f539e87
f948662
 
f539e87
 
f948662
 
 
 
 
f539e87
59bc195
 
f948662
 
3e2a410
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
from scipy.special import softmax
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pytorch_transformers import BertForTokenClassification
import torch
from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
max_len = 60
# Define the directory where the model is saved
bert_out_address = 'model/'

# Load the configuration file
config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the pre-trained model's weights for sequence classification
model = BertForSequenceClassification(config)
# model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))

model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)

# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained(bert_out_address)

# Set the model to evaluation mode (if you're not going to train it further)
model.eval()




def predict(test_query):
    import torch
    tokenized_texts = []
    temp_token = []
    # Add [CLS] at the front 
    temp_token.append('[CLS]')
    token_list = tokenizer.tokenize(test_query)
    for m,token in enumerate(token_list):
        temp_token.append(token)
    # Trim the token to fit the length requirement
    if len(temp_token) > max_len-1:
        temp_token= temp_token[:max_len-1]
    # Add [SEP] at the end
    temp_token.append('[SEP]')
    tokenized_texts.append(temp_token)
    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                              maxlen=max_len, dtype="long", truncating="post", padding="post")
    attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
    segment_ids = [[0] * len(input_id) for input_id in input_ids]
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)
    segment_ids = torch.tensor(segment_ids)

    # Assuming you have defined your model and input_ids somewhere before this
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)  # Move model to GPU if available

    # Move input tensors to the same device as the model
    input_ids = input_ids.to(device)

    with torch.no_grad():
        outputs = model(input_ids, token_type_ids=None, attention_mask=None)
        logits = outputs[0]  # Ensure this is on the same device

    # Make logits into numpy type predict result
    # The predict result contain each token's all tags predict result
    predict_results = logits.detach().cpu().numpy()

    from scipy.special import softmax
    result_arrays_soft = softmax(predict_results[0])
    tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
    result_array = result_arrays_soft
    result_list = np.argmax(result_array,axis=-1)
    asp = []
    for i, mark in enumerate(attention_masks[0]):
        if mark>0:
            if tag2name[result_list[i]] == "ASP":
                # print("Token:%s"%(temp_token[i]))
                asp.append(temp_token[i])
#             print("Predict_Tag:%s"%(tag2name[result_list[i]]))
    return asp

















# Title for the Streamlit app
st.title("Aspect Prediction App")

# Text input
user_input = st.text_area("Enter the text for Aspect Prection:", "")


# Check if there is input text
if user_input:
    # Perform sentiment analysis
    with st.spinner("Analyzing..."):
        result = predict(user_input)
        result
    

    
    # st.write(f"**Aspects** : {sentiment}")