Spaces:

AshenR
/

ASPWords

Sleeping

App Files Files Community

AshenR commited on Oct 11, 2024

Commit

4bfc56c

verified ·

1 Parent(s): f948662

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -0

app.py CHANGED Viewed

@@ -1,5 +1,103 @@
 import streamlit as st
 # Title for the Streamlit app
 st.title("Sentiment Analysis App")

 import streamlit as st
+import torch
+from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
+# Define the directory where the model is saved
+bert_out_address = '/models/'
+# Load the configuration file
+config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
+# Load the pre-trained model's weights for sequence classification
+model = BertForSequenceClassification(config)
+model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))
+# Load the tokenizer
+tokenizer = BertTokenizer.from_pretrained(bert_out_address)
+# Set the model to evaluation mode (if you're not going to train it further)
+model.eval()
+def predict(test_query):
+    import torch
+    tokenized_texts = []
+    temp_token = []
+    # Add [CLS] at the front
+    temp_token.append('[CLS]')
+    token_list = tokenizer.tokenize(test_query)
+    token_list
+    for m,token in enumerate(token_list):
+        temp_token.append(token)
+    # Trim the token to fit the length requirement
+    if len(temp_token) > max_len-1:
+        temp_token= temp_token[:max_len-1]
+    # Add [SEP] at the end
+    temp_token.append('[SEP]')
+    tokenized_texts.append(temp_token)
+    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
+                              maxlen=max_len, dtype="long", truncating="post", padding="post")
+    attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
+    attention_masks[0];
+    segment_ids = [[0] * len(input_id) for input_id in input_ids]
+    segment_ids[0];
+    input_ids = torch.tensor(input_ids)
+    attention_masks = torch.tensor(attention_masks)
+    segment_ids = torch.tensor(segment_ids)
+    import torch
+    # Assuming you have defined your model and input_ids somewhere before this
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    model.to(device)  # Move model to GPU if available
+    # Move input tensors to the same device as the model
+    input_ids = input_ids.to(device)
+    with torch.no_grad():
+        outputs = model(input_ids, token_type_ids=None, attention_mask=None)
+        logits = outputs[0]  # Ensure this is on the same device
+    # Make logits into numpy type predict result
+    # The predict result contain each token's all tags predict result
+    predict_results = logits.detach().cpu().numpy()
+    predict_results.shape
+    from scipy.special import softmax
+    result_arrays_soft = softmax(predict_results[0])
+    result_arrays_soft[0]
+    result_array = result_arrays_soft
+    len(result_array),len(result_array[0])
+    result_list = np.argmax(result_array,axis=-1)
+    asp = []
+    for i, mark in enumerate(attention_masks[0]):
+        if mark>0:
+            if tag2name[result_list[i]] == "ASP":
+                # print("Token:%s"%(temp_token[i]))
+                asp.append(temp_token[i])
+#             print("Predict_Tag:%s"%(tag2name[result_list[i]]))
+    return asp, test_query
 # Title for the Streamlit app
 st.title("Sentiment Analysis App")