ASPPrediction / app.py
AshenR's picture
Update app.py
02a7592 verified
# import streamlit as st
# # Load sentiment analysis pipeline
# from transformers import BertModel
# import torch
# import torch.nn as nn
# class BertForTargetSentiment(nn.Module):
# def __init__(self, bert_model_name='bert-base-uncased', num_labels=3):
# super(BertForTargetSentiment, self).__init__()
# self.bert = BertModel.from_pretrained(bert_model_name)
# self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
# def forward(self, input_ids, attention_mask, word_indices):
# # Get embeddings from BERT
# outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
# last_hidden_state = outputs.last_hidden_state # [batch_size, seq_length, hidden_size]
# # Extract target word embeddings by averaging the subword embeddings
# batch_word_embeddings = []
# for i, word_idx in enumerate(word_indices):
# word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0) # Average across word subtokens
# batch_word_embeddings.append(word_embedding)
# word_embeddings = torch.stack(batch_word_embeddings) # Shape: [batch_size, hidden_size]
# # Pass through classifier to predict sentiment
# logits = self.classifier(word_embeddings)
# return logits
# import torch
# from transformers import BertTokenizer
# # Load the fine-tuned model and tokenizer (replace with your model path if saved locally)
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# modelSentiment = BertForTargetSentiment() # Use the model class defined earlier
# # Load the trained model weights (adjust the path if needed)
# # modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth'))
# modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device('cpu')))
# modelSentiment.eval() # Set the model to evaluation mode
# # Sentiment labels
# sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
# def predict_sentiment(sentence, target_word, max_len=128):
# # Step 1: Tokenize the sentence and target word
# inputs = tokenizer(
# sentence,
# return_tensors='pt',
# padding='max_length',
# truncation=True,
# max_length=max_len
# )
# word_tokens = tokenizer.tokenize(target_word)
# target_token_ids = tokenizer.convert_tokens_to_ids(word_tokens)
# # Step 2: Find the position of the target word in tokenized sentence
# word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids]
# if not word_indices:
# raise ValueError(f"The word '{target_word}' could not be found in the sentence.")
# # Step 3: Forward pass through the model
# with torch.no_grad():
# logits = modelSentiment(
# input_ids=inputs['input_ids'],
# attention_mask=inputs['attention_mask'],
# word_indices=[word_indices]
# )
# # Step 4: Predict the sentiment
# predicted_label = torch.argmax(logits, dim=-1).item()
# predicted_sentiment = sentiment_labels[predicted_label]
# return predicted_sentiment
# from scipy.special import softmax
# import numpy as np
# from tensorflow.keras.preprocessing.sequence import pad_sequences
# import os
# from pytorch_transformers import BertForTokenClassification
# import torch
# from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
# max_len = 60
# # Define the directory where the model is saved
# bert_out_address = 'model/'
# # Load the configuration file
# config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# # Load the pre-trained model's weights for sequence classification
# model = BertForSequenceClassification(config)
# # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))
# model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
# model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)
# # Load the tokenizer
# tokenizer = BertTokenizer.from_pretrained(bert_out_address)
# # Set the model to evaluation mode (if you're not going to train it further)
# model.eval()
# def predict(test_query):
# import torch
# tokenized_texts = []
# temp_token = []
# # Add [CLS] at the front
# temp_token.append('[CLS]')
# token_list = tokenizer.tokenize(test_query)
# for m,token in enumerate(token_list):
# temp_token.append(token)
# # Trim the token to fit the length requirement
# if len(temp_token) > max_len-1:
# temp_token= temp_token[:max_len-1]
# # Add [SEP] at the end
# temp_token.append('[SEP]')
# tokenized_texts.append(temp_token)
# input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
# maxlen=max_len, dtype="long", truncating="post", padding="post")
# attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
# segment_ids = [[0] * len(input_id) for input_id in input_ids]
# input_ids = torch.tensor(input_ids)
# attention_masks = torch.tensor(attention_masks)
# segment_ids = torch.tensor(segment_ids)
# # Assuming you have defined your model and input_ids somewhere before this
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# model.to(device) # Move model to GPU if available
# # Move input tensors to the same device as the model
# input_ids = input_ids.to(device)
# with torch.no_grad():
# outputs = model(input_ids, token_type_ids=None, attention_mask=None)
# logits = outputs[0] # Ensure this is on the same device
# # Make logits into numpy type predict result
# # The predict result contain each token's all tags predict result
# predict_results = logits.detach().cpu().numpy()
# from scipy.special import softmax
# result_arrays_soft = softmax(predict_results[0])
# tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
# result_array = result_arrays_soft
# result_list = np.argmax(result_array,axis=-1)
# asp = []
# for i, mark in enumerate(attention_masks[0]):
# if mark>0:
# if tag2name[result_list[i]] == "ASP":
# # print("Token:%s"%(temp_token[i]))
# asp.append(temp_token[i])
# # print("Predict_Tag:%s"%(tag2name[result_list[i]]))
# return asp
# # Title for the Streamlit app
# st.title("Sentiment Analysis App")
# # Text input
# user_input = st.text_area("Enter the text for sentiment analysis:", "")
# # Check if there is input text
# outs = []
# if user_input:
# # Perform sentiment analysis
# with st.spinner("Analyzing..."):
# result = predict(user_input)
# for i in result:
# i, predict_sentiment(user_input,i.strip())
# # result
import streamlit as st
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig, BertForSequenceClassification
from pytorch_transformers import BertForTokenClassification
import numpy as np
from scipy.special import softmax
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
# Styling and Configuration
st.set_page_config(
page_title="Sentiment Prediction with Aspects",
page_icon="🧠",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for enhanced dark mode styling
st.markdown("""
<style>
/* Dark Mode Base Styling */
.stApp {
background-color: #121212;
color: #e0e0e0;
}
/* Sidebar Styling */
.css-1aumxhk {
background-color: #1e1e1e;
color: #e0e0e0;
}
/* Text Area Styling */
.stTextArea > div > div > textarea {
background-color: #1e1e1e;
color: #e0e0e0;
border: 2px solid #4a4a4a;
border-radius: 10px;
padding: 10px;
}
/* Button Styling */
.stButton > button {
background-color: #2196f3;
color: white;
border-radius: 10px;
border: none;
padding: 10px 20px;
transition: all 0.3s ease;
}
.stButton > button:hover {
background-color: #1976d2;
transform: scale(1.05);
}
/* Result Card Styling */
.result-card {
background-color: #1e1e1e;
border-radius: 10px;
padding: 15px;
margin-bottom: 10px;
box-shadow: 0 4px 6px rgba(255,255,255,0.1);
transition: all 0.3s ease;
color: #e0e0e0;
}
.result-card:hover {
transform: translateY(-5px);
box-shadow: 0 6px 8px rgba(255,255,255,0.15);
}
/* Headings and Text */
h1, h2, h3, h4, p, div, span {
color: #e0e0e0 !important;
}
</style>
""", unsafe_allow_html=True)
# Sentiment Emojis with more expressive options
SENTIMENT_EMOJIS = {
'Negative': '😞', # Sad face for negative sentiment
'Neutral': '😐', # Neutral face for neutral sentiment
'Positive': '😊' # Smiling face for positive sentiment
}
# Sentiment Color Coding (Updated for better visibility in dark mode)
SENTIMENT_COLORS = {
'Negative': '#ff6b6b', # Bright red for negative
'Neutral': '#4a4a4a', # Gray for neutral
'Positive': '#4caf50' # Bright green for positive
}
# Sentiment Prediction Model (Same as previous implementation)
class BertForTargetSentiment(nn.Module):
def __init__(self, bert_model_name='bert-base-uncased', num_labels=3):
super(BertForTargetSentiment, self).__init__()
self.bert = BertModel.from_pretrained(bert_model_name)
self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
def forward(self, input_ids, attention_mask, word_indices):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
last_hidden_state = outputs.last_hidden_state
batch_word_embeddings = []
for i, word_idx in enumerate(word_indices):
word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0)
batch_word_embeddings.append(word_embedding)
word_embeddings = torch.stack(batch_word_embeddings)
logits = self.classifier(word_embeddings)
return logits
# Device configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Load the sentiment prediction model
tokenizer_sentiment = BertTokenizer.from_pretrained('bert-base-uncased')
modelSentiment = BertForTargetSentiment()
modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device(device)))
modelSentiment.eval()
# Sentiment labels
sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
# Load aspect extraction model configurations
max_len = 60
bert_out_address = 'model/'
# Load the configuration file
config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
# Load the pre-trained model's weights for sequence classification
model = BertForSequenceClassification(config)
model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
model = BertForTokenClassification.from_pretrained(bert_out_address, num_labels=5)
# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained(bert_out_address)
# Set the model to evaluation mode
model.eval()
def predict_sentiment(sentence, target_word, max_len=128):
"""
Predict sentiment for a specific target word in a sentence
"""
inputs = tokenizer_sentiment(
sentence,
return_tensors='pt',
padding='max_length',
truncation=True,
max_length=max_len
)
word_tokens = tokenizer_sentiment.tokenize(target_word)
target_token_ids = tokenizer_sentiment.convert_tokens_to_ids(word_tokens)
word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids]
if not word_indices:
raise ValueError(f"The word '{target_word}' could not be found in the sentence.")
with torch.no_grad():
logits = modelSentiment(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
word_indices=[word_indices]
)
predicted_label = torch.argmax(logits, dim=-1).item()
predicted_sentiment = sentiment_labels[predicted_label]
return predicted_sentiment
def predict(test_query):
"""
Extract aspects from the input query
"""
tokenized_texts = []
temp_token = ['[CLS]']
token_list = tokenizer.tokenize(test_query)
for token in token_list:
temp_token.append(token)
if len(temp_token) > max_len-1:
temp_token = temp_token[:max_len-1]
temp_token.append('[SEP]')
tokenized_texts.append(temp_token)
input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
maxlen=max_len, dtype="long", truncating="post", padding="post")
attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
segment_ids = [[0] * len(input_id) for input_id in input_ids]
input_ids = torch.tensor(input_ids)
attention_masks = torch.tensor(attention_masks)
segment_ids = torch.tensor(segment_ids)
model.to(device)
input_ids = input_ids.to(device)
with torch.no_grad():
outputs = model(input_ids, token_type_ids=None, attention_mask=None)
logits = outputs[0]
predict_results = logits.detach().cpu().numpy()
result_arrays_soft = softmax(predict_results[0])
tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
result_array = result_arrays_soft
result_list = np.argmax(result_array, axis=-1)
asp = []
for i, mark in enumerate(attention_masks[0]):
if mark > 0:
if tag2name[result_list[i]] == "ASP":
asp.append(temp_token[i])
return asp
# Streamlit App
def main():
# Title with gradient and centered
st.markdown("""
<h1 style="text-align: center;
background: linear-gradient(to right, #2196f3, #1976d2);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 3rem;">
🧠 Sentiment Insight
</h1>
""", unsafe_allow_html=True)
# Sidebar for app description
st.sidebar.header("About the App")
st.sidebar.info("""
πŸš€ Sentiment Insight analyzes text to:
- Extract key aspects from your text
- Predict sentiment for each aspect
- Provide visual sentiment feedback
πŸ’‘ Simply enter your text and see insights!
""")
# Text input with placeholder
user_input = st.text_area(
"Enter text for sentiment analysis:",
placeholder="Type or paste your text here...",
height=200
)
# Analyze button
analyze_button = st.button("Analyze Sentiment", key="analyze_btn")
# Check if there is input text and button is pressed
if analyze_button and user_input:
# Perform sentiment analysis
with st.spinner("πŸ” Analyzing your text..."):
# Get aspects
aspects = predict(user_input)
# Display results section
st.markdown("## πŸ“Š Analysis Results")
if aspects:
for aspect in aspects:
try:
# Remove any special token prefixes or suffixes
clean_aspect = aspect.strip('##')
# Predict sentiment for the aspect
sentiment = predict_sentiment(user_input, clean_aspect)
# Custom styled result card
st.markdown(f"""
<div class="result-card" style="border-left: 5px solid {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
<h4 style="margin-bottom: 10px;">
<span style="color: {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
{clean_aspect}
</span>
</h4>
<p>
<strong>Sentiment:</strong>
<span style="color: {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
{sentiment} {SENTIMENT_EMOJIS.get(sentiment, '')}
</span>
</p>
</div>
""", unsafe_allow_html=True)
except Exception as e:
st.error(f"Error analyzing aspect {aspect}: {e}")
else:
st.warning("No specific aspects found in the text.")
if __name__ == "__main__":
main()