AshenR commited on
Commit
4bfc56c
·
verified ·
1 Parent(s): f948662

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py CHANGED
@@ -1,5 +1,103 @@
1
  import streamlit as st
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  # Title for the Streamlit app
4
  st.title("Sentiment Analysis App")
5
 
 
1
  import streamlit as st
2
 
3
+
4
+ import torch
5
+ from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
6
+
7
+ # Define the directory where the model is saved
8
+ bert_out_address = '/models/'
9
+
10
+ # Load the configuration file
11
+ config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
12
+
13
+ # Load the pre-trained model's weights for sequence classification
14
+ model = BertForSequenceClassification(config)
15
+ model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))
16
+
17
+ # Load the tokenizer
18
+ tokenizer = BertTokenizer.from_pretrained(bert_out_address)
19
+
20
+ # Set the model to evaluation mode (if you're not going to train it further)
21
+ model.eval()
22
+
23
+
24
+
25
+
26
+ def predict(test_query):
27
+ import torch
28
+ tokenized_texts = []
29
+ temp_token = []
30
+ # Add [CLS] at the front
31
+ temp_token.append('[CLS]')
32
+ token_list = tokenizer.tokenize(test_query)
33
+ token_list
34
+ for m,token in enumerate(token_list):
35
+ temp_token.append(token)
36
+ # Trim the token to fit the length requirement
37
+ if len(temp_token) > max_len-1:
38
+ temp_token= temp_token[:max_len-1]
39
+ # Add [SEP] at the end
40
+ temp_token.append('[SEP]')
41
+ tokenized_texts.append(temp_token)
42
+ input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
43
+ maxlen=max_len, dtype="long", truncating="post", padding="post")
44
+ attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
45
+ attention_masks[0];
46
+ segment_ids = [[0] * len(input_id) for input_id in input_ids]
47
+ segment_ids[0];
48
+ input_ids = torch.tensor(input_ids)
49
+ attention_masks = torch.tensor(attention_masks)
50
+ segment_ids = torch.tensor(segment_ids)
51
+ import torch
52
+
53
+ # Assuming you have defined your model and input_ids somewhere before this
54
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
55
+ model.to(device) # Move model to GPU if available
56
+
57
+ # Move input tensors to the same device as the model
58
+ input_ids = input_ids.to(device)
59
+
60
+ with torch.no_grad():
61
+ outputs = model(input_ids, token_type_ids=None, attention_mask=None)
62
+ logits = outputs[0] # Ensure this is on the same device
63
+
64
+ # Make logits into numpy type predict result
65
+ # The predict result contain each token's all tags predict result
66
+ predict_results = logits.detach().cpu().numpy()
67
+ predict_results.shape
68
+
69
+ from scipy.special import softmax
70
+ result_arrays_soft = softmax(predict_results[0])
71
+ result_arrays_soft[0]
72
+
73
+ result_array = result_arrays_soft
74
+ len(result_array),len(result_array[0])
75
+ result_list = np.argmax(result_array,axis=-1)
76
+ asp = []
77
+ for i, mark in enumerate(attention_masks[0]):
78
+ if mark>0:
79
+ if tag2name[result_list[i]] == "ASP":
80
+ # print("Token:%s"%(temp_token[i]))
81
+ asp.append(temp_token[i])
82
+ # print("Predict_Tag:%s"%(tag2name[result_list[i]]))
83
+ return asp, test_query
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
  # Title for the Streamlit app
102
  st.title("Sentiment Analysis App")
103