Spaces:

AshenR
/

ASPPrediction

Sleeping

App Files Files Community

AshenR commited on Oct 16, 2024

Commit

ebaac8a

verified ·

1 Parent(s): 2ea3090

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -84

app.py CHANGED Viewed

@@ -78,88 +78,88 @@ def predict_sentiment(sentence, target_word, max_len=128):
     return predicted_sentiment
-# from scipy.special import softmax
-# import numpy as np
-# from tensorflow.keras.preprocessing.sequence import pad_sequences
-# import os
-# from pytorch_transformers import BertForTokenClassification
-# import torch
-# from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
-# max_len = 60
-# # Define the directory where the model is saved
-# bert_out_address = 'model/'
-# # Load the configuration file
-# config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
-# device = 'cuda' if torch.cuda.is_available() else 'cpu'
-# # Load the pre-trained model's weights for sequence classification
-# model = BertForSequenceClassification(config)
-# # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))
-# model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
-# model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)
-# # Load the tokenizer
-# tokenizer = BertTokenizer.from_pretrained(bert_out_address)
-# # Set the model to evaluation mode (if you're not going to train it further)
-# model.eval()
-# def predict(test_query):
-#     import torch
-#     tokenized_texts = []
-#     temp_token = []
-#     # Add [CLS] at the front
-#     temp_token.append('[CLS]')
-#     token_list = tokenizer.tokenize(test_query)
-#     for m,token in enumerate(token_list):
-#         temp_token.append(token)
-#     # Trim the token to fit the length requirement
-#     if len(temp_token) > max_len-1:
-#         temp_token= temp_token[:max_len-1]
-#     # Add [SEP] at the end
-#     temp_token.append('[SEP]')
-#     tokenized_texts.append(temp_token)
-#     input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
-#                               maxlen=max_len, dtype="long", truncating="post", padding="post")
-#     attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
-#     segment_ids = [[0] * len(input_id) for input_id in input_ids]
-#     input_ids = torch.tensor(input_ids)
-#     attention_masks = torch.tensor(attention_masks)
-#     segment_ids = torch.tensor(segment_ids)
-#     # Assuming you have defined your model and input_ids somewhere before this
-#     device = 'cuda' if torch.cuda.is_available() else 'cpu'
-#     model.to(device)  # Move model to GPU if available
-#     # Move input tensors to the same device as the model
-#     input_ids = input_ids.to(device)
-#     with torch.no_grad():
-#         outputs = model(input_ids, token_type_ids=None, attention_mask=None)
-#         logits = outputs[0]  # Ensure this is on the same device
-#     # Make logits into numpy type predict result
-#     # The predict result contain each token's all tags predict result
-#     predict_results = logits.detach().cpu().numpy()
-#     from scipy.special import softmax
-#     result_arrays_soft = softmax(predict_results[0])
-#     tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
-#     result_array = result_arrays_soft
-#     result_list = np.argmax(result_array,axis=-1)
-#     asp = []
-#     for i, mark in enumerate(attention_masks[0]):
-#         if mark>0:
-#             if tag2name[result_list[i]] == "ASP":
-#                 # print("Token:%s"%(temp_token[i]))
-#                 asp.append(temp_token[i])
-# #             print("Predict_Tag:%s"%(tag2name[result_list[i]]))
-#     return asp
 # Title for the Streamlit app
@@ -167,7 +167,6 @@ st.title("Sentiment Analysis App")
 # Text input
 user_input = st.text_area("Enter the text for sentiment analysis:", "")
-user_input2 = st.text_area("Enter the word for sentiment analysis:", "")
 # Check if there is input text
@@ -175,7 +174,7 @@ outs = []
 if user_input:
     # Perform sentiment analysis
     with st.spinner("Analyzing..."):
-        result = predict_sentiment(user_input,user_input2)
         # for i in result:
         #     i, predict_sentiment(user_input,i.strip())

     return predicted_sentiment
+from scipy.special import softmax
+import numpy as np
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import os
+from pytorch_transformers import BertForTokenClassification
+import torch
+from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
+max_len = 60
+# Define the directory where the model is saved
+bert_out_address = 'model/'
+# Load the configuration file
+config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Load the pre-trained model's weights for sequence classification
+model = BertForSequenceClassification(config)
+# model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))
+model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
+model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)
+# Load the tokenizer
+tokenizer = BertTokenizer.from_pretrained(bert_out_address)
+# Set the model to evaluation mode (if you're not going to train it further)
+model.eval()
+def predict(test_query):
+    import torch
+    tokenized_texts = []
+    temp_token = []
+    # Add [CLS] at the front
+    temp_token.append('[CLS]')
+    token_list = tokenizer.tokenize(test_query)
+    for m,token in enumerate(token_list):
+        temp_token.append(token)
+    # Trim the token to fit the length requirement
+    if len(temp_token) > max_len-1:
+        temp_token= temp_token[:max_len-1]
+    # Add [SEP] at the end
+    temp_token.append('[SEP]')
+    tokenized_texts.append(temp_token)
+    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
+                              maxlen=max_len, dtype="long", truncating="post", padding="post")
+    attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
+    segment_ids = [[0] * len(input_id) for input_id in input_ids]
+    input_ids = torch.tensor(input_ids)
+    attention_masks = torch.tensor(attention_masks)
+    segment_ids = torch.tensor(segment_ids)
+    # Assuming you have defined your model and input_ids somewhere before this
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    model.to(device)  # Move model to GPU if available
+    # Move input tensors to the same device as the model
+    input_ids = input_ids.to(device)
+    with torch.no_grad():
+        outputs = model(input_ids, token_type_ids=None, attention_mask=None)
+        logits = outputs[0]  # Ensure this is on the same device
+    # Make logits into numpy type predict result
+    # The predict result contain each token's all tags predict result
+    predict_results = logits.detach().cpu().numpy()
+    from scipy.special import softmax
+    result_arrays_soft = softmax(predict_results[0])
+    tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
+    result_array = result_arrays_soft
+    result_list = np.argmax(result_array,axis=-1)
+    asp = []
+    for i, mark in enumerate(attention_masks[0]):
+        if mark>0:
+            if tag2name[result_list[i]] == "ASP":
+                # print("Token:%s"%(temp_token[i]))
+                asp.append(temp_token[i])
+#             print("Predict_Tag:%s"%(tag2name[result_list[i]]))
+    return asp
 # Title for the Streamlit app
 # Text input
 user_input = st.text_area("Enter the text for sentiment analysis:", "")
 # Check if there is input text
 if user_input:
     # Perform sentiment analysis
     with st.spinner("Analyzing..."):
+        result = predict(user_input)
         # for i in result:
         #     i, predict_sentiment(user_input,i.strip())