AshenR commited on
Commit
ebaac8a
·
verified ·
1 Parent(s): 2ea3090

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -84
app.py CHANGED
@@ -78,88 +78,88 @@ def predict_sentiment(sentence, target_word, max_len=128):
78
 
79
  return predicted_sentiment
80
 
81
- # from scipy.special import softmax
82
- # import numpy as np
83
- # from tensorflow.keras.preprocessing.sequence import pad_sequences
84
- # import os
85
- # from pytorch_transformers import BertForTokenClassification
86
- # import torch
87
- # from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
88
- # max_len = 60
89
- # # Define the directory where the model is saved
90
- # bert_out_address = 'model/'
91
-
92
- # # Load the configuration file
93
- # config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
94
- # device = 'cuda' if torch.cuda.is_available() else 'cpu'
95
-
96
- # # Load the pre-trained model's weights for sequence classification
97
- # model = BertForSequenceClassification(config)
98
- # # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))
99
-
100
- # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
101
- # model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)
102
-
103
- # # Load the tokenizer
104
- # tokenizer = BertTokenizer.from_pretrained(bert_out_address)
105
-
106
- # # Set the model to evaluation mode (if you're not going to train it further)
107
- # model.eval()
108
-
109
-
110
-
111
-
112
- # def predict(test_query):
113
- # import torch
114
- # tokenized_texts = []
115
- # temp_token = []
116
- # # Add [CLS] at the front
117
- # temp_token.append('[CLS]')
118
- # token_list = tokenizer.tokenize(test_query)
119
- # for m,token in enumerate(token_list):
120
- # temp_token.append(token)
121
- # # Trim the token to fit the length requirement
122
- # if len(temp_token) > max_len-1:
123
- # temp_token= temp_token[:max_len-1]
124
- # # Add [SEP] at the end
125
- # temp_token.append('[SEP]')
126
- # tokenized_texts.append(temp_token)
127
- # input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
128
- # maxlen=max_len, dtype="long", truncating="post", padding="post")
129
- # attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
130
- # segment_ids = [[0] * len(input_id) for input_id in input_ids]
131
- # input_ids = torch.tensor(input_ids)
132
- # attention_masks = torch.tensor(attention_masks)
133
- # segment_ids = torch.tensor(segment_ids)
134
-
135
- # # Assuming you have defined your model and input_ids somewhere before this
136
- # device = 'cuda' if torch.cuda.is_available() else 'cpu'
137
- # model.to(device) # Move model to GPU if available
138
-
139
- # # Move input tensors to the same device as the model
140
- # input_ids = input_ids.to(device)
141
-
142
- # with torch.no_grad():
143
- # outputs = model(input_ids, token_type_ids=None, attention_mask=None)
144
- # logits = outputs[0] # Ensure this is on the same device
145
-
146
- # # Make logits into numpy type predict result
147
- # # The predict result contain each token's all tags predict result
148
- # predict_results = logits.detach().cpu().numpy()
149
-
150
- # from scipy.special import softmax
151
- # result_arrays_soft = softmax(predict_results[0])
152
- # tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
153
- # result_array = result_arrays_soft
154
- # result_list = np.argmax(result_array,axis=-1)
155
- # asp = []
156
- # for i, mark in enumerate(attention_masks[0]):
157
- # if mark>0:
158
- # if tag2name[result_list[i]] == "ASP":
159
- # # print("Token:%s"%(temp_token[i]))
160
- # asp.append(temp_token[i])
161
- # # print("Predict_Tag:%s"%(tag2name[result_list[i]]))
162
- # return asp
163
 
164
 
165
  # Title for the Streamlit app
@@ -167,7 +167,6 @@ st.title("Sentiment Analysis App")
167
 
168
  # Text input
169
  user_input = st.text_area("Enter the text for sentiment analysis:", "")
170
- user_input2 = st.text_area("Enter the word for sentiment analysis:", "")
171
 
172
 
173
  # Check if there is input text
@@ -175,7 +174,7 @@ outs = []
175
  if user_input:
176
  # Perform sentiment analysis
177
  with st.spinner("Analyzing..."):
178
- result = predict_sentiment(user_input,user_input2)
179
  # for i in result:
180
  # i, predict_sentiment(user_input,i.strip())
181
 
 
78
 
79
  return predicted_sentiment
80
 
81
+ from scipy.special import softmax
82
+ import numpy as np
83
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
84
+ import os
85
+ from pytorch_transformers import BertForTokenClassification
86
+ import torch
87
+ from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
88
+ max_len = 60
89
+ # Define the directory where the model is saved
90
+ bert_out_address = 'model/'
91
+
92
+ # Load the configuration file
93
+ config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
94
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
95
+
96
+ # Load the pre-trained model's weights for sequence classification
97
+ model = BertForSequenceClassification(config)
98
+ # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))
99
+
100
+ model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
101
+ model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)
102
+
103
+ # Load the tokenizer
104
+ tokenizer = BertTokenizer.from_pretrained(bert_out_address)
105
+
106
+ # Set the model to evaluation mode (if you're not going to train it further)
107
+ model.eval()
108
+
109
+
110
+
111
+
112
+ def predict(test_query):
113
+ import torch
114
+ tokenized_texts = []
115
+ temp_token = []
116
+ # Add [CLS] at the front
117
+ temp_token.append('[CLS]')
118
+ token_list = tokenizer.tokenize(test_query)
119
+ for m,token in enumerate(token_list):
120
+ temp_token.append(token)
121
+ # Trim the token to fit the length requirement
122
+ if len(temp_token) > max_len-1:
123
+ temp_token= temp_token[:max_len-1]
124
+ # Add [SEP] at the end
125
+ temp_token.append('[SEP]')
126
+ tokenized_texts.append(temp_token)
127
+ input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
128
+ maxlen=max_len, dtype="long", truncating="post", padding="post")
129
+ attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
130
+ segment_ids = [[0] * len(input_id) for input_id in input_ids]
131
+ input_ids = torch.tensor(input_ids)
132
+ attention_masks = torch.tensor(attention_masks)
133
+ segment_ids = torch.tensor(segment_ids)
134
+
135
+ # Assuming you have defined your model and input_ids somewhere before this
136
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
137
+ model.to(device) # Move model to GPU if available
138
+
139
+ # Move input tensors to the same device as the model
140
+ input_ids = input_ids.to(device)
141
+
142
+ with torch.no_grad():
143
+ outputs = model(input_ids, token_type_ids=None, attention_mask=None)
144
+ logits = outputs[0] # Ensure this is on the same device
145
+
146
+ # Make logits into numpy type predict result
147
+ # The predict result contain each token's all tags predict result
148
+ predict_results = logits.detach().cpu().numpy()
149
+
150
+ from scipy.special import softmax
151
+ result_arrays_soft = softmax(predict_results[0])
152
+ tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
153
+ result_array = result_arrays_soft
154
+ result_list = np.argmax(result_array,axis=-1)
155
+ asp = []
156
+ for i, mark in enumerate(attention_masks[0]):
157
+ if mark>0:
158
+ if tag2name[result_list[i]] == "ASP":
159
+ # print("Token:%s"%(temp_token[i]))
160
+ asp.append(temp_token[i])
161
+ # print("Predict_Tag:%s"%(tag2name[result_list[i]]))
162
+ return asp
163
 
164
 
165
  # Title for the Streamlit app
 
167
 
168
  # Text input
169
  user_input = st.text_area("Enter the text for sentiment analysis:", "")
 
170
 
171
 
172
  # Check if there is input text
 
174
  if user_input:
175
  # Perform sentiment analysis
176
  with st.spinner("Analyzing..."):
177
+ result = predict(user_input)
178
  # for i in result:
179
  # i, predict_sentiment(user_input,i.strip())
180