Spaces:
Sleeping
Sleeping
| # import streamlit as st | |
| # # Load sentiment analysis pipeline | |
| # from transformers import BertModel | |
| # import torch | |
| # import torch.nn as nn | |
| # class BertForTargetSentiment(nn.Module): | |
| # def __init__(self, bert_model_name='bert-base-uncased', num_labels=3): | |
| # super(BertForTargetSentiment, self).__init__() | |
| # self.bert = BertModel.from_pretrained(bert_model_name) | |
| # self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) | |
| # def forward(self, input_ids, attention_mask, word_indices): | |
| # # Get embeddings from BERT | |
| # outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
| # last_hidden_state = outputs.last_hidden_state # [batch_size, seq_length, hidden_size] | |
| # # Extract target word embeddings by averaging the subword embeddings | |
| # batch_word_embeddings = [] | |
| # for i, word_idx in enumerate(word_indices): | |
| # word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0) # Average across word subtokens | |
| # batch_word_embeddings.append(word_embedding) | |
| # word_embeddings = torch.stack(batch_word_embeddings) # Shape: [batch_size, hidden_size] | |
| # # Pass through classifier to predict sentiment | |
| # logits = self.classifier(word_embeddings) | |
| # return logits | |
| # import torch | |
| # from transformers import BertTokenizer | |
| # # Load the fine-tuned model and tokenizer (replace with your model path if saved locally) | |
| # tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
| # modelSentiment = BertForTargetSentiment() # Use the model class defined earlier | |
| # # Load the trained model weights (adjust the path if needed) | |
| # # modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth')) | |
| # modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device('cpu'))) | |
| # modelSentiment.eval() # Set the model to evaluation mode | |
| # # Sentiment labels | |
| # sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} | |
| # def predict_sentiment(sentence, target_word, max_len=128): | |
| # # Step 1: Tokenize the sentence and target word | |
| # inputs = tokenizer( | |
| # sentence, | |
| # return_tensors='pt', | |
| # padding='max_length', | |
| # truncation=True, | |
| # max_length=max_len | |
| # ) | |
| # word_tokens = tokenizer.tokenize(target_word) | |
| # target_token_ids = tokenizer.convert_tokens_to_ids(word_tokens) | |
| # # Step 2: Find the position of the target word in tokenized sentence | |
| # word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids] | |
| # if not word_indices: | |
| # raise ValueError(f"The word '{target_word}' could not be found in the sentence.") | |
| # # Step 3: Forward pass through the model | |
| # with torch.no_grad(): | |
| # logits = modelSentiment( | |
| # input_ids=inputs['input_ids'], | |
| # attention_mask=inputs['attention_mask'], | |
| # word_indices=[word_indices] | |
| # ) | |
| # # Step 4: Predict the sentiment | |
| # predicted_label = torch.argmax(logits, dim=-1).item() | |
| # predicted_sentiment = sentiment_labels[predicted_label] | |
| # return predicted_sentiment | |
| # from scipy.special import softmax | |
| # import numpy as np | |
| # from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| # import os | |
| # from pytorch_transformers import BertForTokenClassification | |
| # import torch | |
| # from transformers import BertConfig, BertForSequenceClassification, BertTokenizer | |
| # max_len = 60 | |
| # # Define the directory where the model is saved | |
| # bert_out_address = 'model/' | |
| # # Load the configuration file | |
| # config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json")) | |
| # device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| # # Load the pre-trained model's weights for sequence classification | |
| # model = BertForSequenceClassification(config) | |
| # # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"))) | |
| # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device))) | |
| # model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5) | |
| # # Load the tokenizer | |
| # tokenizer = BertTokenizer.from_pretrained(bert_out_address) | |
| # # Set the model to evaluation mode (if you're not going to train it further) | |
| # model.eval() | |
| # def predict(test_query): | |
| # import torch | |
| # tokenized_texts = [] | |
| # temp_token = [] | |
| # # Add [CLS] at the front | |
| # temp_token.append('[CLS]') | |
| # token_list = tokenizer.tokenize(test_query) | |
| # for m,token in enumerate(token_list): | |
| # temp_token.append(token) | |
| # # Trim the token to fit the length requirement | |
| # if len(temp_token) > max_len-1: | |
| # temp_token= temp_token[:max_len-1] | |
| # # Add [SEP] at the end | |
| # temp_token.append('[SEP]') | |
| # tokenized_texts.append(temp_token) | |
| # input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts], | |
| # maxlen=max_len, dtype="long", truncating="post", padding="post") | |
| # attention_masks = [[int(i>0) for i in ii] for ii in input_ids] | |
| # segment_ids = [[0] * len(input_id) for input_id in input_ids] | |
| # input_ids = torch.tensor(input_ids) | |
| # attention_masks = torch.tensor(attention_masks) | |
| # segment_ids = torch.tensor(segment_ids) | |
| # # Assuming you have defined your model and input_ids somewhere before this | |
| # device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| # model.to(device) # Move model to GPU if available | |
| # # Move input tensors to the same device as the model | |
| # input_ids = input_ids.to(device) | |
| # with torch.no_grad(): | |
| # outputs = model(input_ids, token_type_ids=None, attention_mask=None) | |
| # logits = outputs[0] # Ensure this is on the same device | |
| # # Make logits into numpy type predict result | |
| # # The predict result contain each token's all tags predict result | |
| # predict_results = logits.detach().cpu().numpy() | |
| # from scipy.special import softmax | |
| # result_arrays_soft = softmax(predict_results[0]) | |
| # tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'} | |
| # result_array = result_arrays_soft | |
| # result_list = np.argmax(result_array,axis=-1) | |
| # asp = [] | |
| # for i, mark in enumerate(attention_masks[0]): | |
| # if mark>0: | |
| # if tag2name[result_list[i]] == "ASP": | |
| # # print("Token:%s"%(temp_token[i])) | |
| # asp.append(temp_token[i]) | |
| # # print("Predict_Tag:%s"%(tag2name[result_list[i]])) | |
| # return asp | |
| # # Title for the Streamlit app | |
| # st.title("Sentiment Analysis App") | |
| # # Text input | |
| # user_input = st.text_area("Enter the text for sentiment analysis:", "") | |
| # # Check if there is input text | |
| # outs = [] | |
| # if user_input: | |
| # # Perform sentiment analysis | |
| # with st.spinner("Analyzing..."): | |
| # result = predict(user_input) | |
| # for i in result: | |
| # i, predict_sentiment(user_input,i.strip()) | |
| # # result | |
| import streamlit as st | |
| import torch | |
| import torch.nn as nn | |
| from transformers import BertModel, BertTokenizer, BertConfig, BertForSequenceClassification | |
| from pytorch_transformers import BertForTokenClassification | |
| import numpy as np | |
| from scipy.special import softmax | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import os | |
| # Styling and Configuration | |
| st.set_page_config( | |
| page_title="Sentiment Prediction with Aspects", | |
| page_icon="π§ ", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS for enhanced dark mode styling | |
| st.markdown(""" | |
| <style> | |
| /* Dark Mode Base Styling */ | |
| .stApp { | |
| background-color: #121212; | |
| color: #e0e0e0; | |
| } | |
| /* Sidebar Styling */ | |
| .css-1aumxhk { | |
| background-color: #1e1e1e; | |
| color: #e0e0e0; | |
| } | |
| /* Text Area Styling */ | |
| .stTextArea > div > div > textarea { | |
| background-color: #1e1e1e; | |
| color: #e0e0e0; | |
| border: 2px solid #4a4a4a; | |
| border-radius: 10px; | |
| padding: 10px; | |
| } | |
| /* Button Styling */ | |
| .stButton > button { | |
| background-color: #2196f3; | |
| color: white; | |
| border-radius: 10px; | |
| border: none; | |
| padding: 10px 20px; | |
| transition: all 0.3s ease; | |
| } | |
| .stButton > button:hover { | |
| background-color: #1976d2; | |
| transform: scale(1.05); | |
| } | |
| /* Result Card Styling */ | |
| .result-card { | |
| background-color: #1e1e1e; | |
| border-radius: 10px; | |
| padding: 15px; | |
| margin-bottom: 10px; | |
| box-shadow: 0 4px 6px rgba(255,255,255,0.1); | |
| transition: all 0.3s ease; | |
| color: #e0e0e0; | |
| } | |
| .result-card:hover { | |
| transform: translateY(-5px); | |
| box-shadow: 0 6px 8px rgba(255,255,255,0.15); | |
| } | |
| /* Headings and Text */ | |
| h1, h2, h3, h4, p, div, span { | |
| color: #e0e0e0 !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Sentiment Emojis with more expressive options | |
| SENTIMENT_EMOJIS = { | |
| 'Negative': 'π', # Sad face for negative sentiment | |
| 'Neutral': 'π', # Neutral face for neutral sentiment | |
| 'Positive': 'π' # Smiling face for positive sentiment | |
| } | |
| # Sentiment Color Coding (Updated for better visibility in dark mode) | |
| SENTIMENT_COLORS = { | |
| 'Negative': '#ff6b6b', # Bright red for negative | |
| 'Neutral': '#4a4a4a', # Gray for neutral | |
| 'Positive': '#4caf50' # Bright green for positive | |
| } | |
| # Sentiment Prediction Model (Same as previous implementation) | |
| class BertForTargetSentiment(nn.Module): | |
| def __init__(self, bert_model_name='bert-base-uncased', num_labels=3): | |
| super(BertForTargetSentiment, self).__init__() | |
| self.bert = BertModel.from_pretrained(bert_model_name) | |
| self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) | |
| def forward(self, input_ids, attention_mask, word_indices): | |
| outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
| last_hidden_state = outputs.last_hidden_state | |
| batch_word_embeddings = [] | |
| for i, word_idx in enumerate(word_indices): | |
| word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0) | |
| batch_word_embeddings.append(word_embedding) | |
| word_embeddings = torch.stack(batch_word_embeddings) | |
| logits = self.classifier(word_embeddings) | |
| return logits | |
| # Device configuration | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| # Load the sentiment prediction model | |
| tokenizer_sentiment = BertTokenizer.from_pretrained('bert-base-uncased') | |
| modelSentiment = BertForTargetSentiment() | |
| modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device(device))) | |
| modelSentiment.eval() | |
| # Sentiment labels | |
| sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} | |
| # Load aspect extraction model configurations | |
| max_len = 60 | |
| bert_out_address = 'model/' | |
| # Load the configuration file | |
| config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json")) | |
| # Load the pre-trained model's weights for sequence classification | |
| model = BertForSequenceClassification(config) | |
| model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device))) | |
| model = BertForTokenClassification.from_pretrained(bert_out_address, num_labels=5) | |
| # Load the tokenizer | |
| tokenizer = BertTokenizer.from_pretrained(bert_out_address) | |
| # Set the model to evaluation mode | |
| model.eval() | |
| def predict_sentiment(sentence, target_word, max_len=128): | |
| """ | |
| Predict sentiment for a specific target word in a sentence | |
| """ | |
| inputs = tokenizer_sentiment( | |
| sentence, | |
| return_tensors='pt', | |
| padding='max_length', | |
| truncation=True, | |
| max_length=max_len | |
| ) | |
| word_tokens = tokenizer_sentiment.tokenize(target_word) | |
| target_token_ids = tokenizer_sentiment.convert_tokens_to_ids(word_tokens) | |
| word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids] | |
| if not word_indices: | |
| raise ValueError(f"The word '{target_word}' could not be found in the sentence.") | |
| with torch.no_grad(): | |
| logits = modelSentiment( | |
| input_ids=inputs['input_ids'], | |
| attention_mask=inputs['attention_mask'], | |
| word_indices=[word_indices] | |
| ) | |
| predicted_label = torch.argmax(logits, dim=-1).item() | |
| predicted_sentiment = sentiment_labels[predicted_label] | |
| return predicted_sentiment | |
| def predict(test_query): | |
| """ | |
| Extract aspects from the input query | |
| """ | |
| tokenized_texts = [] | |
| temp_token = ['[CLS]'] | |
| token_list = tokenizer.tokenize(test_query) | |
| for token in token_list: | |
| temp_token.append(token) | |
| if len(temp_token) > max_len-1: | |
| temp_token = temp_token[:max_len-1] | |
| temp_token.append('[SEP]') | |
| tokenized_texts.append(temp_token) | |
| input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts], | |
| maxlen=max_len, dtype="long", truncating="post", padding="post") | |
| attention_masks = [[int(i>0) for i in ii] for ii in input_ids] | |
| segment_ids = [[0] * len(input_id) for input_id in input_ids] | |
| input_ids = torch.tensor(input_ids) | |
| attention_masks = torch.tensor(attention_masks) | |
| segment_ids = torch.tensor(segment_ids) | |
| model.to(device) | |
| input_ids = input_ids.to(device) | |
| with torch.no_grad(): | |
| outputs = model(input_ids, token_type_ids=None, attention_mask=None) | |
| logits = outputs[0] | |
| predict_results = logits.detach().cpu().numpy() | |
| result_arrays_soft = softmax(predict_results[0]) | |
| tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'} | |
| result_array = result_arrays_soft | |
| result_list = np.argmax(result_array, axis=-1) | |
| asp = [] | |
| for i, mark in enumerate(attention_masks[0]): | |
| if mark > 0: | |
| if tag2name[result_list[i]] == "ASP": | |
| asp.append(temp_token[i]) | |
| return asp | |
| # Streamlit App | |
| def main(): | |
| # Title with gradient and centered | |
| st.markdown(""" | |
| <h1 style="text-align: center; | |
| background: linear-gradient(to right, #2196f3, #1976d2); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 3rem;"> | |
| π§ Sentiment Insight | |
| </h1> | |
| """, unsafe_allow_html=True) | |
| # Sidebar for app description | |
| st.sidebar.header("About the App") | |
| st.sidebar.info(""" | |
| π Sentiment Insight analyzes text to: | |
| - Extract key aspects from your text | |
| - Predict sentiment for each aspect | |
| - Provide visual sentiment feedback | |
| π‘ Simply enter your text and see insights! | |
| """) | |
| # Text input with placeholder | |
| user_input = st.text_area( | |
| "Enter text for sentiment analysis:", | |
| placeholder="Type or paste your text here...", | |
| height=200 | |
| ) | |
| # Analyze button | |
| analyze_button = st.button("Analyze Sentiment", key="analyze_btn") | |
| # Check if there is input text and button is pressed | |
| if analyze_button and user_input: | |
| # Perform sentiment analysis | |
| with st.spinner("π Analyzing your text..."): | |
| # Get aspects | |
| aspects = predict(user_input) | |
| # Display results section | |
| st.markdown("## π Analysis Results") | |
| if aspects: | |
| for aspect in aspects: | |
| try: | |
| # Remove any special token prefixes or suffixes | |
| clean_aspect = aspect.strip('##') | |
| # Predict sentiment for the aspect | |
| sentiment = predict_sentiment(user_input, clean_aspect) | |
| # Custom styled result card | |
| st.markdown(f""" | |
| <div class="result-card" style="border-left: 5px solid {SENTIMENT_COLORS.get(sentiment, '#2196f3')};"> | |
| <h4 style="margin-bottom: 10px;"> | |
| <span style="color: {SENTIMENT_COLORS.get(sentiment, '#2196f3')};"> | |
| {clean_aspect} | |
| </span> | |
| </h4> | |
| <p> | |
| <strong>Sentiment:</strong> | |
| <span style="color: {SENTIMENT_COLORS.get(sentiment, '#2196f3')};"> | |
| {sentiment} {SENTIMENT_EMOJIS.get(sentiment, '')} | |
| </span> | |
| </p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| except Exception as e: | |
| st.error(f"Error analyzing aspect {aspect}: {e}") | |
| else: | |
| st.warning("No specific aspects found in the text.") | |
| if __name__ == "__main__": | |
| main() |