Spaces:

AshenR
/

ASPPrediction

Sleeping

App Files Files Community

ASPPrediction / app.py

AshenR

Update app.py

02a7592 verified 10 months ago

raw

history blame contribute delete

17.2 kB

	# import streamlit as st

	# # Load sentiment analysis pipeline
	# from transformers import BertModel
	# import torch
	# import torch.nn as nn

	# class BertForTargetSentiment(nn.Module):
	# def __init__(self, bert_model_name='bert-base-uncased', num_labels=3):
	# super(BertForTargetSentiment, self).__init__()
	# self.bert = BertModel.from_pretrained(bert_model_name)
	# self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)

	# def forward(self, input_ids, attention_mask, word_indices):
	# # Get embeddings from BERT
	# outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
	# last_hidden_state = outputs.last_hidden_state # [batch_size, seq_length, hidden_size]

	# # Extract target word embeddings by averaging the subword embeddings
	# batch_word_embeddings = []
	# for i, word_idx in enumerate(word_indices):
	# word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0) # Average across word subtokens
	# batch_word_embeddings.append(word_embedding)

	# word_embeddings = torch.stack(batch_word_embeddings) # Shape: [batch_size, hidden_size]

	# # Pass through classifier to predict sentiment
	# logits = self.classifier(word_embeddings)
	# return logits

	# import torch
	# from transformers import BertTokenizer

	# # Load the fine-tuned model and tokenizer (replace with your model path if saved locally)
	# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
	# modelSentiment = BertForTargetSentiment() # Use the model class defined earlier

	# # Load the trained model weights (adjust the path if needed)
	# # modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth'))
	# modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device('cpu')))

	# modelSentiment.eval() # Set the model to evaluation mode

	# # Sentiment labels
	# sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}

	# def predict_sentiment(sentence, target_word, max_len=128):

	# # Step 1: Tokenize the sentence and target word
	# inputs = tokenizer(
	# sentence,
	# return_tensors='pt',
	# padding='max_length',
	# truncation=True,
	# max_length=max_len
	# )

	# word_tokens = tokenizer.tokenize(target_word)
	# target_token_ids = tokenizer.convert_tokens_to_ids(word_tokens)

	# # Step 2: Find the position of the target word in tokenized sentence
	# word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids]

	# if not word_indices:
	# raise ValueError(f"The word '{target_word}' could not be found in the sentence.")

	# # Step 3: Forward pass through the model
	# with torch.no_grad():
	# logits = modelSentiment(
	# input_ids=inputs['input_ids'],
	# attention_mask=inputs['attention_mask'],
	# word_indices=[word_indices]
	# )

	# # Step 4: Predict the sentiment
	# predicted_label = torch.argmax(logits, dim=-1).item()
	# predicted_sentiment = sentiment_labels[predicted_label]

	# return predicted_sentiment

	# from scipy.special import softmax
	# import numpy as np
	# from tensorflow.keras.preprocessing.sequence import pad_sequences
	# import os
	# from pytorch_transformers import BertForTokenClassification
	# import torch
	# from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
	# max_len = 60
	# # Define the directory where the model is saved
	# bert_out_address = 'model/'

	# # Load the configuration file
	# config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
	# device = 'cuda' if torch.cuda.is_available() else 'cpu'

	# # Load the pre-trained model's weights for sequence classification
	# model = BertForSequenceClassification(config)
	# # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))

	# model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
	# model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)

	# # Load the tokenizer
	# tokenizer = BertTokenizer.from_pretrained(bert_out_address)

	# # Set the model to evaluation mode (if you're not going to train it further)
	# model.eval()




	# def predict(test_query):
	# import torch
	# tokenized_texts = []
	# temp_token = []
	# # Add [CLS] at the front
	# temp_token.append('[CLS]')
	# token_list = tokenizer.tokenize(test_query)
	# for m,token in enumerate(token_list):
	# temp_token.append(token)
	# # Trim the token to fit the length requirement
	# if len(temp_token) > max_len-1:
	# temp_token= temp_token[:max_len-1]
	# # Add [SEP] at the end
	# temp_token.append('[SEP]')
	# tokenized_texts.append(temp_token)
	# input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
	# maxlen=max_len, dtype="long", truncating="post", padding="post")
	# attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
	# segment_ids = [[0] * len(input_id) for input_id in input_ids]
	# input_ids = torch.tensor(input_ids)
	# attention_masks = torch.tensor(attention_masks)
	# segment_ids = torch.tensor(segment_ids)

	# # Assuming you have defined your model and input_ids somewhere before this
	# device = 'cuda' if torch.cuda.is_available() else 'cpu'
	# model.to(device) # Move model to GPU if available

	# # Move input tensors to the same device as the model
	# input_ids = input_ids.to(device)

	# with torch.no_grad():
	# outputs = model(input_ids, token_type_ids=None, attention_mask=None)
	# logits = outputs[0] # Ensure this is on the same device

	# # Make logits into numpy type predict result
	# # The predict result contain each token's all tags predict result
	# predict_results = logits.detach().cpu().numpy()

	# from scipy.special import softmax
	# result_arrays_soft = softmax(predict_results[0])
	# tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
	# result_array = result_arrays_soft
	# result_list = np.argmax(result_array,axis=-1)
	# asp = []
	# for i, mark in enumerate(attention_masks[0]):
	# if mark>0:
	# if tag2name[result_list[i]] == "ASP":
	# # print("Token:%s"%(temp_token[i]))
	# asp.append(temp_token[i])
	# # print("Predict_Tag:%s"%(tag2name[result_list[i]]))
	# return asp


	# # Title for the Streamlit app
	# st.title("Sentiment Analysis App")

	# # Text input
	# user_input = st.text_area("Enter the text for sentiment analysis:", "")


	# # Check if there is input text
	# outs = []
	# if user_input:
	# # Perform sentiment analysis
	# with st.spinner("Analyzing..."):
	# result = predict(user_input)
	# for i in result:
	# i, predict_sentiment(user_input,i.strip())


	# # result


	import streamlit as st
	import torch
	import torch.nn as nn
	from transformers import BertModel, BertTokenizer, BertConfig, BertForSequenceClassification
	from pytorch_transformers import BertForTokenClassification
	import numpy as np
	from scipy.special import softmax
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import os

	# Styling and Configuration
	st.set_page_config(
	page_title="Sentiment Prediction with Aspects",
	page_icon="🧠",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for enhanced dark mode styling
	st.markdown("""
	<style>
	/* Dark Mode Base Styling */
	.stApp {
	background-color: #121212;
	color: #e0e0e0;
	}

	/* Sidebar Styling */
	.css-1aumxhk {
	background-color: #1e1e1e;
	color: #e0e0e0;
	}

	/* Text Area Styling */
	.stTextArea > div > div > textarea {
	background-color: #1e1e1e;
	color: #e0e0e0;
	border: 2px solid #4a4a4a;
	border-radius: 10px;
	padding: 10px;
	}

	/* Button Styling */
	.stButton > button {
	background-color: #2196f3;
	color: white;
	border-radius: 10px;
	border: none;
	padding: 10px 20px;
	transition: all 0.3s ease;
	}

	.stButton > button:hover {
	background-color: #1976d2;
	transform: scale(1.05);
	}

	/* Result Card Styling */
	.result-card {
	background-color: #1e1e1e;
	border-radius: 10px;
	padding: 15px;
	margin-bottom: 10px;
	box-shadow: 0 4px 6px rgba(255,255,255,0.1);
	transition: all 0.3s ease;
	color: #e0e0e0;
	}

	.result-card:hover {
	transform: translateY(-5px);
	box-shadow: 0 6px 8px rgba(255,255,255,0.15);
	}

	/* Headings and Text */
	h1, h2, h3, h4, p, div, span {
	color: #e0e0e0 !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# Sentiment Emojis with more expressive options
	SENTIMENT_EMOJIS = {
	'Negative': '😞', # Sad face for negative sentiment
	'Neutral': '😐', # Neutral face for neutral sentiment
	'Positive': '😊' # Smiling face for positive sentiment
	}

	# Sentiment Color Coding (Updated for better visibility in dark mode)
	SENTIMENT_COLORS = {
	'Negative': '#ff6b6b', # Bright red for negative
	'Neutral': '#4a4a4a', # Gray for neutral
	'Positive': '#4caf50' # Bright green for positive
	}

	# Sentiment Prediction Model (Same as previous implementation)
	class BertForTargetSentiment(nn.Module):
	def __init__(self, bert_model_name='bert-base-uncased', num_labels=3):
	super(BertForTargetSentiment, self).__init__()
	self.bert = BertModel.from_pretrained(bert_model_name)
	self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)

	def forward(self, input_ids, attention_mask, word_indices):
	outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
	last_hidden_state = outputs.last_hidden_state

	batch_word_embeddings = []
	for i, word_idx in enumerate(word_indices):
	word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0)
	batch_word_embeddings.append(word_embedding)

	word_embeddings = torch.stack(batch_word_embeddings)
	logits = self.classifier(word_embeddings)
	return logits

	# Device configuration
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	# Load the sentiment prediction model
	tokenizer_sentiment = BertTokenizer.from_pretrained('bert-base-uncased')
	modelSentiment = BertForTargetSentiment()
	modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device(device)))
	modelSentiment.eval()

	# Sentiment labels
	sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}

	# Load aspect extraction model configurations
	max_len = 60
	bert_out_address = 'model/'

	# Load the configuration file
	config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))

	# Load the pre-trained model's weights for sequence classification
	model = BertForSequenceClassification(config)
	model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
	model = BertForTokenClassification.from_pretrained(bert_out_address, num_labels=5)

	# Load the tokenizer
	tokenizer = BertTokenizer.from_pretrained(bert_out_address)

	# Set the model to evaluation mode
	model.eval()

	def predict_sentiment(sentence, target_word, max_len=128):
	"""
	Predict sentiment for a specific target word in a sentence
	"""
	inputs = tokenizer_sentiment(
	sentence,
	return_tensors='pt',
	padding='max_length',
	truncation=True,
	max_length=max_len
	)

	word_tokens = tokenizer_sentiment.tokenize(target_word)
	target_token_ids = tokenizer_sentiment.convert_tokens_to_ids(word_tokens)

	word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids]

	if not word_indices:
	raise ValueError(f"The word '{target_word}' could not be found in the sentence.")

	with torch.no_grad():
	logits = modelSentiment(
	input_ids=inputs['input_ids'],
	attention_mask=inputs['attention_mask'],
	word_indices=[word_indices]
	)

	predicted_label = torch.argmax(logits, dim=-1).item()
	predicted_sentiment = sentiment_labels[predicted_label]

	return predicted_sentiment

	def predict(test_query):
	"""
	Extract aspects from the input query
	"""
	tokenized_texts = []
	temp_token = ['[CLS]']

	token_list = tokenizer.tokenize(test_query)

	for token in token_list:
	temp_token.append(token)

	if len(temp_token) > max_len-1:
	temp_token = temp_token[:max_len-1]

	temp_token.append('[SEP]')
	tokenized_texts.append(temp_token)

	input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
	maxlen=max_len, dtype="long", truncating="post", padding="post")
	attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
	segment_ids = [[0] * len(input_id) for input_id in input_ids]

	input_ids = torch.tensor(input_ids)
	attention_masks = torch.tensor(attention_masks)
	segment_ids = torch.tensor(segment_ids)

	model.to(device)
	input_ids = input_ids.to(device)

	with torch.no_grad():
	outputs = model(input_ids, token_type_ids=None, attention_mask=None)
	logits = outputs[0]

	predict_results = logits.detach().cpu().numpy()

	result_arrays_soft = softmax(predict_results[0])
	tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
	result_array = result_arrays_soft
	result_list = np.argmax(result_array, axis=-1)

	asp = []
	for i, mark in enumerate(attention_masks[0]):
	if mark > 0:
	if tag2name[result_list[i]] == "ASP":
	asp.append(temp_token[i])
	return asp

	# Streamlit App
	def main():
	# Title with gradient and centered
	st.markdown("""
	<h1 style="text-align: center;
	background: linear-gradient(to right, #2196f3, #1976d2);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 3rem;">
	🧠 Sentiment Insight
	</h1>
	""", unsafe_allow_html=True)

	# Sidebar for app description
	st.sidebar.header("About the App")
	st.sidebar.info("""
	🚀 Sentiment Insight analyzes text to:
	- Extract key aspects from your text
	- Predict sentiment for each aspect
	- Provide visual sentiment feedback

	💡 Simply enter your text and see insights!
	""")

	# Text input with placeholder
	user_input = st.text_area(
	"Enter text for sentiment analysis:",
	placeholder="Type or paste your text here...",
	height=200
	)

	# Analyze button
	analyze_button = st.button("Analyze Sentiment", key="analyze_btn")

	# Check if there is input text and button is pressed
	if analyze_button and user_input:
	# Perform sentiment analysis
	with st.spinner("🔍 Analyzing your text..."):
	# Get aspects
	aspects = predict(user_input)

	# Display results section
	st.markdown("## 📊 Analysis Results")

	if aspects:
	for aspect in aspects:
	try:
	# Remove any special token prefixes or suffixes
	clean_aspect = aspect.strip('##')

	# Predict sentiment for the aspect
	sentiment = predict_sentiment(user_input, clean_aspect)

	# Custom styled result card
	st.markdown(f"""
	<div class="result-card" style="border-left: 5px solid {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
	<h4 style="margin-bottom: 10px;">
	<span style="color: {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
	{clean_aspect}
	</span>
	</h4>
	<p>
	<strong>Sentiment:</strong>
	<span style="color: {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
	{sentiment} {SENTIMENT_EMOJIS.get(sentiment, '')}
	</span>
	</p>
	</div>
	""", unsafe_allow_html=True)

	except Exception as e:
	st.error(f"Error analyzing aspect {aspect}: {e}")
	else:
	st.warning("No specific aspects found in the text.")

	if __name__ == "__main__":
	main()