Spaces:

AshenR
/

ASPPrediction

Sleeping

File size: 17,197 Bytes

# import streamlit as st

# # Load sentiment analysis pipeline
# from transformers import BertModel
# import torch
# import torch.nn as nn

# class BertForTargetSentiment(nn.Module):
#     def __init__(self, bert_model_name='bert-base-uncased', num_labels=3):
#         super(BertForTargetSentiment, self).__init__()
#         self.bert = BertModel.from_pretrained(bert_model_name)
#         self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
    
#     def forward(self, input_ids, attention_mask, word_indices):
#         # Get embeddings from BERT
#         outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
#         last_hidden_state = outputs.last_hidden_state  # [batch_size, seq_length, hidden_size]
        
#         # Extract target word embeddings by averaging the subword embeddings
#         batch_word_embeddings = []
#         for i, word_idx in enumerate(word_indices):
#             word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0)  # Average across word subtokens
#             batch_word_embeddings.append(word_embedding)
        
#         word_embeddings = torch.stack(batch_word_embeddings)  # Shape: [batch_size, hidden_size]
        
#         # Pass through classifier to predict sentiment
#         logits = self.classifier(word_embeddings)
#         return logits

# import torch
# from transformers import BertTokenizer

# # Load the fine-tuned model and tokenizer (replace with your model path if saved locally)
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# modelSentiment = BertForTargetSentiment()  # Use the model class defined earlier

# # Load the trained model weights (adjust the path if needed)
# # modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth'))
# modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device('cpu')))

# modelSentiment.eval()  # Set the model to evaluation mode

# # Sentiment labels
# sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}

# def predict_sentiment(sentence, target_word, max_len=128):

#     # Step 1: Tokenize the sentence and target word
#     inputs = tokenizer(
#         sentence, 
#         return_tensors='pt', 
#         padding='max_length', 
#         truncation=True, 
#         max_length=max_len
#     )
    
#     word_tokens = tokenizer.tokenize(target_word)
#     target_token_ids = tokenizer.convert_tokens_to_ids(word_tokens)
    
#     # Step 2: Find the position of the target word in tokenized sentence
#     word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids]
    
#     if not word_indices:
#         raise ValueError(f"The word '{target_word}' could not be found in the sentence.")
    
#     # Step 3: Forward pass through the model
#     with torch.no_grad():
#         logits = modelSentiment(
#             input_ids=inputs['input_ids'], 
#             attention_mask=inputs['attention_mask'], 
#             word_indices=[word_indices]
#         )
    
#     # Step 4: Predict the sentiment
#     predicted_label = torch.argmax(logits, dim=-1).item()
#     predicted_sentiment = sentiment_labels[predicted_label]
    
#     return predicted_sentiment

# from scipy.special import softmax
# import numpy as np
# from tensorflow.keras.preprocessing.sequence import pad_sequences
# import os
# from pytorch_transformers import BertForTokenClassification
# import torch
# from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
# max_len = 60
# # Define the directory where the model is saved
# bert_out_address = 'model/'

# # Load the configuration file
# config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))
# device = 'cuda' if torch.cuda.is_available() else 'cpu'

# # Load the pre-trained model's weights for sequence classification
# model = BertForSequenceClassification(config)
# # model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin")))

# model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
# model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5)

# # Load the tokenizer
# tokenizer = BertTokenizer.from_pretrained(bert_out_address)

# # Set the model to evaluation mode (if you're not going to train it further)
# model.eval()




# def predict(test_query):
#     import torch
#     tokenized_texts = []
#     temp_token = []
#     # Add [CLS] at the front 
#     temp_token.append('[CLS]')
#     token_list = tokenizer.tokenize(test_query)
#     for m,token in enumerate(token_list):
#         temp_token.append(token)
#     # Trim the token to fit the length requirement
#     if len(temp_token) > max_len-1:
#         temp_token= temp_token[:max_len-1]
#     # Add [SEP] at the end
#     temp_token.append('[SEP]')
#     tokenized_texts.append(temp_token)
#     input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
#                               maxlen=max_len, dtype="long", truncating="post", padding="post")
#     attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
#     segment_ids = [[0] * len(input_id) for input_id in input_ids]
#     input_ids = torch.tensor(input_ids)
#     attention_masks = torch.tensor(attention_masks)
#     segment_ids = torch.tensor(segment_ids)

#     # Assuming you have defined your model and input_ids somewhere before this
#     device = 'cuda' if torch.cuda.is_available() else 'cpu'
#     model.to(device)  # Move model to GPU if available

#     # Move input tensors to the same device as the model
#     input_ids = input_ids.to(device)

#     with torch.no_grad():
#         outputs = model(input_ids, token_type_ids=None, attention_mask=None)
#         logits = outputs[0]  # Ensure this is on the same device

#     # Make logits into numpy type predict result
#     # The predict result contain each token's all tags predict result
#     predict_results = logits.detach().cpu().numpy()

#     from scipy.special import softmax
#     result_arrays_soft = softmax(predict_results[0])
#     tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
#     result_array = result_arrays_soft
#     result_list = np.argmax(result_array,axis=-1)
#     asp = []
#     for i, mark in enumerate(attention_masks[0]):
#         if mark>0:
#             if tag2name[result_list[i]] == "ASP":
#                 # print("Token:%s"%(temp_token[i]))
#                 asp.append(temp_token[i])
# #             print("Predict_Tag:%s"%(tag2name[result_list[i]]))
#     return asp


# # Title for the Streamlit app
# st.title("Sentiment Analysis App")

# # Text input
# user_input = st.text_area("Enter the text for sentiment analysis:", "")


# # Check if there is input text
# outs = []
# if user_input:
#     # Perform sentiment analysis
#     with st.spinner("Analyzing..."):
#         result = predict(user_input)
#         for i in result:
#             i, predict_sentiment(user_input,i.strip())
        

#         # result


import streamlit as st
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig, BertForSequenceClassification
from pytorch_transformers import BertForTokenClassification
import numpy as np
from scipy.special import softmax
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os

# Styling and Configuration
st.set_page_config(
    page_title="Sentiment Prediction with Aspects",
    page_icon="🧠",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for enhanced dark mode styling
st.markdown("""
<style>
    /* Dark Mode Base Styling */
    .stApp {
        background-color: #121212;
        color: #e0e0e0;
    }

    /* Sidebar Styling */
    .css-1aumxhk {
        background-color: #1e1e1e;
        color: #e0e0e0;
    }

    /* Text Area Styling */
    .stTextArea > div > div > textarea {
        background-color: #1e1e1e;
        color: #e0e0e0;
        border: 2px solid #4a4a4a;
        border-radius: 10px;
        padding: 10px;
    }

    /* Button Styling */
    .stButton > button {
        background-color: #2196f3;
        color: white;
        border-radius: 10px;
        border: none;
        padding: 10px 20px;
        transition: all 0.3s ease;
    }

    .stButton > button:hover {
        background-color: #1976d2;
        transform: scale(1.05);
    }

    /* Result Card Styling */
    .result-card {
        background-color: #1e1e1e;
        border-radius: 10px;
        padding: 15px;
        margin-bottom: 10px;
        box-shadow: 0 4px 6px rgba(255,255,255,0.1);
        transition: all 0.3s ease;
        color: #e0e0e0;
    }

    .result-card:hover {
        transform: translateY(-5px);
        box-shadow: 0 6px 8px rgba(255,255,255,0.15);
    }

    /* Headings and Text */
    h1, h2, h3, h4, p, div, span {
        color: #e0e0e0 !important;
    }
</style>
""", unsafe_allow_html=True)

# Sentiment Emojis with more expressive options
SENTIMENT_EMOJIS = {
    'Negative': '😞',   # Sad face for negative sentiment
    'Neutral': '😐',    # Neutral face for neutral sentiment
    'Positive': '😊'    # Smiling face for positive sentiment
}

# Sentiment Color Coding (Updated for better visibility in dark mode)
SENTIMENT_COLORS = {
    'Negative': '#ff6b6b',  # Bright red for negative
    'Neutral': '#4a4a4a',   # Gray for neutral
    'Positive': '#4caf50'   # Bright green for positive
}

# Sentiment Prediction Model (Same as previous implementation)
class BertForTargetSentiment(nn.Module):
    def __init__(self, bert_model_name='bert-base-uncased', num_labels=3):
        super(BertForTargetSentiment, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
    
    def forward(self, input_ids, attention_mask, word_indices):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state
        
        batch_word_embeddings = []
        for i, word_idx in enumerate(word_indices):
            word_embedding = last_hidden_state[i, word_idx, :].mean(dim=0)
            batch_word_embeddings.append(word_embedding)
        
        word_embeddings = torch.stack(batch_word_embeddings)
        logits = self.classifier(word_embeddings)
        return logits

# Device configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the sentiment prediction model
tokenizer_sentiment = BertTokenizer.from_pretrained('bert-base-uncased')
modelSentiment = BertForTargetSentiment()  
modelSentiment.load_state_dict(torch.load('bert_target_sentiment_model.pth', map_location=torch.device(device)))
modelSentiment.eval()

# Sentiment labels
sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}

# Load aspect extraction model configurations
max_len = 60
bert_out_address = 'model/'

# Load the configuration file
config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json"))

# Load the pre-trained model's weights for sequence classification
model = BertForSequenceClassification(config)
model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device)))
model = BertForTokenClassification.from_pretrained(bert_out_address, num_labels=5)

# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained(bert_out_address)

# Set the model to evaluation mode
model.eval()

def predict_sentiment(sentence, target_word, max_len=128):
    """
    Predict sentiment for a specific target word in a sentence
    """
    inputs = tokenizer_sentiment(
        sentence, 
        return_tensors='pt', 
        padding='max_length', 
        truncation=True, 
        max_length=max_len
    )
    
    word_tokens = tokenizer_sentiment.tokenize(target_word)
    target_token_ids = tokenizer_sentiment.convert_tokens_to_ids(word_tokens)
    
    word_indices = [i for i, token_id in enumerate(inputs['input_ids'][0]) if token_id in target_token_ids]
    
    if not word_indices:
        raise ValueError(f"The word '{target_word}' could not be found in the sentence.")
    
    with torch.no_grad():
        logits = modelSentiment(
            input_ids=inputs['input_ids'], 
            attention_mask=inputs['attention_mask'], 
            word_indices=[word_indices]
        )
    
    predicted_label = torch.argmax(logits, dim=-1).item()
    predicted_sentiment = sentiment_labels[predicted_label]
    
    return predicted_sentiment

def predict(test_query):
    """
    Extract aspects from the input query
    """
    tokenized_texts = []
    temp_token = ['[CLS]']
    
    token_list = tokenizer.tokenize(test_query)
    
    for token in token_list:
        temp_token.append(token)
    
    if len(temp_token) > max_len-1:
        temp_token = temp_token[:max_len-1]
    
    temp_token.append('[SEP]')
    tokenized_texts.append(temp_token)
    
    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                              maxlen=max_len, dtype="long", truncating="post", padding="post")
    attention_masks = [[int(i>0) for i in ii] for ii in input_ids]
    segment_ids = [[0] * len(input_id) for input_id in input_ids]
    
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)
    segment_ids = torch.tensor(segment_ids)

    model.to(device)
    input_ids = input_ids.to(device)

    with torch.no_grad():
        outputs = model(input_ids, token_type_ids=None, attention_mask=None)
        logits = outputs[0]

    predict_results = logits.detach().cpu().numpy()

    result_arrays_soft = softmax(predict_results[0])
    tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'}
    result_array = result_arrays_soft
    result_list = np.argmax(result_array, axis=-1)
    
    asp = []
    for i, mark in enumerate(attention_masks[0]):
        if mark > 0:
            if tag2name[result_list[i]] == "ASP":
                asp.append(temp_token[i])
    return asp

# Streamlit App
def main():
    # Title with gradient and centered
    st.markdown("""
    <h1 style="text-align: center; 
               background: linear-gradient(to right, #2196f3, #1976d2);
               -webkit-background-clip: text;
               -webkit-text-fill-color: transparent;
               font-size: 3rem;">
    🧠 Sentiment Insight
    </h1>
    """, unsafe_allow_html=True)

    # Sidebar for app description
    st.sidebar.header("About the App")
    st.sidebar.info("""
    🚀 Sentiment Insight analyzes text to:
    - Extract key aspects from your text
    - Predict sentiment for each aspect
    - Provide visual sentiment feedback
    
    💡 Simply enter your text and see insights!
    """)

    # Text input with placeholder
    user_input = st.text_area(
        "Enter text for sentiment analysis:", 
        placeholder="Type or paste your text here...",
        height=200
    )

    # Analyze button
    analyze_button = st.button("Analyze Sentiment", key="analyze_btn")

    # Check if there is input text and button is pressed
    if analyze_button and user_input:
        # Perform sentiment analysis
        with st.spinner("🔍 Analyzing your text..."):
            # Get aspects
            aspects = predict(user_input)
            
            # Display results section
            st.markdown("## 📊 Analysis Results")
            
            if aspects:
                for aspect in aspects:
                    try:
                        # Remove any special token prefixes or suffixes
                        clean_aspect = aspect.strip('##')
                        
                        # Predict sentiment for the aspect
                        sentiment = predict_sentiment(user_input, clean_aspect)
                        
                        # Custom styled result card
                        st.markdown(f"""
                        <div class="result-card" style="border-left: 5px solid {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
                            <h4 style="margin-bottom: 10px;">
                                <span style="color: {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
                                    {clean_aspect}
                                </span>
                            </h4>
                            <p>
                                <strong>Sentiment:</strong> 
                                <span style="color: {SENTIMENT_COLORS.get(sentiment, '#2196f3')};">
                                    {sentiment} {SENTIMENT_EMOJIS.get(sentiment, '')}
                                </span>
                            </p>
                        </div>
                        """, unsafe_allow_html=True)
                    
                    except Exception as e:
                        st.error(f"Error analyzing aspect {aspect}: {e}")
            else:
                st.warning("No specific aspects found in the text.")

if __name__ == "__main__":
    main()