# import streamlit as st # # Load sentiment analysis pipeline # from transformers import BertModel # import torch # import torch.nn as nn # class BertForTargetSentiment(nn.Module): # def __init__(self, bert_model_name='bert-base-uncased', num_labels=3): # super(BertForTargetSentiment, self).__init__() # self.bert = BertModel.from_pretrained(bert_model_name) # self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) # def forward(self, input_ids, attention_mask, word_indices): # # Get embeddings from BERT # outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) # last_hidden_state = outputs.last_hidden_state # [batch_size, seq_length, hidden_size] # # Extract target word embeddings by averaging the subword embeddings # batch_word_embeddings = [] # for i, word_idx in enumerate(word_indices): # word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0) # Average across word subtokens # batch_word_embeddings.append(word_embedding) # word_embeddings = torch.stack(batch_word_embeddings) # Shape: [batch_size, hidden_size] # # Pass through classifier to predict sentiment # logits = self.classifier(word_embeddings) # return logits # import torch # from transformers import BertTokenizer # # Load the fine-tuned model and tokenizer (replace with your model path if saved locally) # tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # modelSentiment = BertForTargetSentiment() # Use the model class defined earlier # # Load the trained model weights (adjust the path if needed) # # modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth')) # modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device('cpu'))) # modelSentiment.eval() # Set the model to evaluation mode # # Sentiment labels # sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} # def predict_sentiment(sentence, target_word, max_len=128): # # Step 1: Tokenize the sentence and target word # inputs = tokenizer( # sentence, # return_tensors='pt', # padding='max_length', # truncation=True, # max_length=max_len # ) # word_tokens = tokenizer.tokenize(target_word) # target_token_ids = tokenizer.convert_tokens_to_ids(word_tokens) # # Step 2: Find the position of the target word in tokenized sentence # word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids] # if not word_indices: # raise ValueError(f"The word '{target_word}' could not be found in the sentence.") # # Step 3: Forward pass through the model # with torch.no_grad(): # logits = modelSentiment( # input_ids=inputs['input_ids'], # attention_mask=inputs['attention_mask'], # word_indices=[word_indices] # ) # # Step 4: Predict the sentiment # predicted_label = torch.argmax(logits, dim=-1).item() # predicted_sentiment = sentiment_labels[predicted_label] # return predicted_sentiment # from scipy.special import softmax # import numpy as np # from tensorflow.keras.preprocessing.sequence import pad_sequences # import os # from pytorch_transformers import BertForTokenClassification # import torch # from transformers import BertConfig, BertForSequenceClassification, BertTokenizer # max_len = 60 # # Define the directory where the model is saved # bert_out_address = 'model/' # # Load the configuration file # config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json")) # device = 'cuda' if torch.cuda.is_available() else 'cpu' # # Load the pre-trained model's weights for sequence classification # model = BertForSequenceClassification(config) # # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"))) # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device))) # model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5) # # Load the tokenizer # tokenizer = BertTokenizer.from_pretrained(bert_out_address) # # Set the model to evaluation mode (if you're not going to train it further) # model.eval() # def predict(test_query): # import torch # tokenized_texts = [] # temp_token = [] # # Add [CLS] at the front # temp_token.append('[CLS]') # token_list = tokenizer.tokenize(test_query) # for m,token in enumerate(token_list): # temp_token.append(token) # # Trim the token to fit the length requirement # if len(temp_token) > max_len-1: # temp_token= temp_token[:max_len-1] # # Add [SEP] at the end # temp_token.append('[SEP]') # tokenized_texts.append(temp_token) # input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts], # maxlen=max_len, dtype="long", truncating="post", padding="post") # attention_masks = [[int(i>0) for i in ii] for ii in input_ids] # segment_ids = [[0] * len(input_id) for input_id in input_ids] # input_ids = torch.tensor(input_ids) # attention_masks = torch.tensor(attention_masks) # segment_ids = torch.tensor(segment_ids) # # Assuming you have defined your model and input_ids somewhere before this # device = 'cuda' if torch.cuda.is_available() else 'cpu' # model.to(device) # Move model to GPU if available # # Move input tensors to the same device as the model # input_ids = input_ids.to(device) # with torch.no_grad(): # outputs = model(input_ids, token_type_ids=None, attention_mask=None) # logits = outputs[0] # Ensure this is on the same device # # Make logits into numpy type predict result # # The predict result contain each token's all tags predict result # predict_results = logits.detach().cpu().numpy() # from scipy.special import softmax # result_arrays_soft = softmax(predict_results[0]) # tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'} # result_array = result_arrays_soft # result_list = np.argmax(result_array,axis=-1) # asp = [] # for i, mark in enumerate(attention_masks[0]): # if mark>0: # if tag2name[result_list[i]] == "ASP": # # print("Token:%s"%(temp_token[i])) # asp.append(temp_token[i]) # # print("Predict_Tag:%s"%(tag2name[result_list[i]])) # return asp # # Title for the Streamlit app # st.title("Sentiment Analysis App") # # Text input # user_input = st.text_area("Enter the text for sentiment analysis:", "") # # Check if there is input text # outs = [] # if user_input: # # Perform sentiment analysis # with st.spinner("Analyzing..."): # result = predict(user_input) # for i in result: # i, predict_sentiment(user_input,i.strip()) # # result import streamlit as st import torch import torch.nn as nn from transformers import BertModel, BertTokenizer, BertConfig, BertForSequenceClassification from pytorch_transformers import BertForTokenClassification import numpy as np from scipy.special import softmax from tensorflow.keras.preprocessing.sequence import pad_sequences import os # Styling and Configuration st.set_page_config( page_title="Sentiment Prediction with Aspects", page_icon="🧠", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for enhanced dark mode styling st.markdown(""" """, unsafe_allow_html=True) # Sentiment Emojis with more expressive options SENTIMENT_EMOJIS = { 'Negative': '😞', # Sad face for negative sentiment 'Neutral': '😐', # Neutral face for neutral sentiment 'Positive': '😊' # Smiling face for positive sentiment } # Sentiment Color Coding (Updated for better visibility in dark mode) SENTIMENT_COLORS = { 'Negative': '#ff6b6b', # Bright red for negative 'Neutral': '#4a4a4a', # Gray for neutral 'Positive': '#4caf50' # Bright green for positive } # Sentiment Prediction Model (Same as previous implementation) class BertForTargetSentiment(nn.Module): def __init__(self, bert_model_name='bert-base-uncased', num_labels=3): super(BertForTargetSentiment, self).__init__() self.bert = BertModel.from_pretrained(bert_model_name) self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) def forward(self, input_ids, attention_mask, word_indices): outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) last_hidden_state = outputs.last_hidden_state batch_word_embeddings = [] for i, word_idx in enumerate(word_indices): word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0) batch_word_embeddings.append(word_embedding) word_embeddings = torch.stack(batch_word_embeddings) logits = self.classifier(word_embeddings) return logits # Device configuration device = 'cuda' if torch.cuda.is_available() else 'cpu' # Load the sentiment prediction model tokenizer_sentiment = BertTokenizer.from_pretrained('bert-base-uncased') modelSentiment = BertForTargetSentiment() modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device(device))) modelSentiment.eval() # Sentiment labels sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} # Load aspect extraction model configurations max_len = 60 bert_out_address = 'model/' # Load the configuration file config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json")) # Load the pre-trained model's weights for sequence classification model = BertForSequenceClassification(config) model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device))) model = BertForTokenClassification.from_pretrained(bert_out_address, num_labels=5) # Load the tokenizer tokenizer = BertTokenizer.from_pretrained(bert_out_address) # Set the model to evaluation mode model.eval() def predict_sentiment(sentence, target_word, max_len=128): """ Predict sentiment for a specific target word in a sentence """ inputs = tokenizer_sentiment( sentence, return_tensors='pt', padding='max_length', truncation=True, max_length=max_len ) word_tokens = tokenizer_sentiment.tokenize(target_word) target_token_ids = tokenizer_sentiment.convert_tokens_to_ids(word_tokens) word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids] if not word_indices: raise ValueError(f"The word '{target_word}' could not be found in the sentence.") with torch.no_grad(): logits = modelSentiment( input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], word_indices=[word_indices] ) predicted_label = torch.argmax(logits, dim=-1).item() predicted_sentiment = sentiment_labels[predicted_label] return predicted_sentiment def predict(test_query): """ Extract aspects from the input query """ tokenized_texts = [] temp_token = ['[CLS]'] token_list = tokenizer.tokenize(test_query) for token in token_list: temp_token.append(token) if len(temp_token) > max_len-1: temp_token = temp_token[:max_len-1] temp_token.append('[SEP]') tokenized_texts.append(temp_token) input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts], maxlen=max_len, dtype="long", truncating="post", padding="post") attention_masks = [[int(i>0) for i in ii] for ii in input_ids] segment_ids = [[0] * len(input_id) for input_id in input_ids] input_ids = torch.tensor(input_ids) attention_masks = torch.tensor(attention_masks) segment_ids = torch.tensor(segment_ids) model.to(device) input_ids = input_ids.to(device) with torch.no_grad(): outputs = model(input_ids, token_type_ids=None, attention_mask=None) logits = outputs[0] predict_results = logits.detach().cpu().numpy() result_arrays_soft = softmax(predict_results[0]) tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'} result_array = result_arrays_soft result_list = np.argmax(result_array, axis=-1) asp = [] for i, mark in enumerate(attention_masks[0]): if mark > 0: if tag2name[result_list[i]] == "ASP": asp.append(temp_token[i]) return asp # Streamlit App def main(): # Title with gradient and centered st.markdown("""
Sentiment: {sentiment} {SENTIMENT_EMOJIS.get(sentiment, '')}