SentimentAnalyzerFinbert

Sleeping

File size: 7,998 Bytes

import streamlit as st
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import plotly.express as px
from sklearn.linear_model import LinearRegression
from wordcloud import WordCloud
import base64
from io import BytesIO
import nltk
from textblob import TextBlob

# Download NLTK data
nltk.download('punkt')

# Initialize sentiment models
bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
vader_analyzer = SentimentIntensityAnalyzer()

# Generate sample past sentiment data (kept from original for demo purposes)
dates = [datetime.today() - timedelta(days=i) for i in range(14)]
sentiment_scores = np.random.uniform(-1, 1, len(dates))
df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})

# Train a regression model for predictions
X = np.array(range(len(df))).reshape(-1, 1)
y = df["Sentiment Score"]
model = LinearRegression()
model.fit(X, y)

# Predict for next 7 days
future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
predictions = model.predict(X_future)
future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})

# Generate Word Cloud
def generate_wordcloud(text):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    img = BytesIO()
    wordcloud.to_image().save(img, format='PNG')
    return base64.b64encode(img.getvalue()).decode()

# Helper function to convert BERT labels to numerical scores
def bert_score(result):
    label = result['label']
    if label == '1 star':
        return -1
    elif label == '2 stars':
        return -0.5
    elif label == '3 stars':
        return 0
    elif label == '4 stars':
        return 0.5
    elif label == '5 stars':
        return 1
    return 0

# Get overall sentiment score based on selected model
def get_overall_score(text, model_choice):
    if model_choice == "VADER":
        return vader_analyzer.polarity_scores(text)['compound']
    elif model_choice == "BERT":
        result = bert_sentiment(text)[0]
        return bert_score(result)
    elif model_choice == "TextBlob":
        return TextBlob(text).sentiment.polarity

# Streamlit app setup
st.title("🌟 Advanced Sentiment Analysis Dashboard")

# Sidebar for user input and controls
st.sidebar.header("🔍 Sentiment Analysis Controls")
analysis_mode = st.sidebar.radio("Analysis Mode", ["Single Text", "Compare Two Texts", "Analyze CSV File"])

if analysis_mode == "Single Text":
    user_input = st.sidebar.text_area("Enter text for sentiment analysis")
elif analysis_mode == "Compare Two Texts":
    user_input_a = st.sidebar.text_area("Enter first text")
    user_input_b = st.sidebar.text_area("Enter second text")
elif analysis_mode == "Analyze CSV File":
    uploaded_file = st.sidebar.file_uploader("Upload a CSV file with 'text' column", type=["csv"])

model_choice = st.sidebar.selectbox("Choose Sentiment Model", ["VADER", "BERT", "TextBlob"])

# Analyze button handler
if st.sidebar.button("Analyze Sentiment"):
    if analysis_mode == "Single Text":
        if not user_input.strip():
            st.error("Please enter some text for analysis.")
        elif not any(c.isalpha() for c in user_input):
            st.error("Input should contain at least one alphabetic character.")
        else:
            with st.spinner("Analyzing text..."):
                overall_score = get_overall_score(user_input, model_choice)
                st.subheader("📊 Overall Sentiment Analysis")
                st.write(f"**Sentiment Score ({model_choice})**: {overall_score:.2f}")

                # Sentence-level analysis
                sentences = nltk.sent_tokenize(user_input)
                if model_choice == "VADER":
                    sentence_scores = [vader_analyzer.polarity_scores(s)['compound'] for s in sentences]
                elif model_choice == "BERT":
                    sentence_scores = [bert_score(bert_sentiment(s)[0]) for s in sentences]
                elif model_choice == "TextBlob":
                    sentence_scores = [TextBlob(s).sentiment.polarity for s in sentences]

                sentiment_df = pd.DataFrame({"Sentence": sentences, "Sentiment Score": sentence_scores})
                st.subheader("🔍 Sentence-Level Sentiment")
                st.write(sentiment_df)
                fig = px.bar(sentiment_df, x="Sentence", y="Sentiment Score", title="Sentiment per Sentence")
                st.plotly_chart(fig)

                # Word cloud
                st.subheader("☁️ Word Cloud")
                wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
                st.image(wordcloud_img, use_column_width=True)

                # Download results
                @st.cache_data
                def convert_df_to_csv(df):
                    return df.to_csv(index=False).encode('utf-8')
                csv = convert_df_to_csv(sentiment_df)
                st.download_button(
                    label="Download Sentiment Data",
                    data=csv,
                    file_name='sentiment_data.csv',
                    mime='text/csv',
                )

    elif analysis_mode == "Compare Two Texts":
        if not user_input_a.strip() or not user_input_b.strip():
            st.error("Please enter both texts for comparison.")
        elif not any(c.isalpha() for c in user_input_a) or not any(c.isalpha() for c in user_input_b):
            st.error("Both inputs should contain at least one alphabetic character.")
        else:
            with st.spinner("Analyzing texts..."):
                overall_score_a = get_overall_score(user_input_a, model_choice)
                overall_score_b = get_overall_score(user_input_b, model_choice)
                col1, col2 = st.columns(2)
                with col1:
                    st.subheader("Text A")
                    st.write(f"**Sentiment Score ({model_choice})**: {overall_score_a:.2f}")
                with col2:
                    st.subheader("Text B")
                    st.write(f"**Sentiment Score ({model_choice})**: {overall_score_b:.2f}")
                comparison_df = pd.DataFrame({
                    "Text": ["Text A", "Text B"],
                    "Sentiment Score": [overall_score_a, overall_score_b]
                })
                fig = px.bar(comparison_df, x="Text", y="Sentiment Score", title="Sentiment Comparison")
                st.plotly_chart(fig)

    elif analysis_mode == "Analyze CSV File":
        if uploaded_file is not None:
            df_uploaded = pd.read_csv(uploaded_file)
            if "text" not in df_uploaded.columns:
                st.error("CSV file must contain a 'text' column.")
            else:
                with st.spinner("Analyzing uploaded texts..."):
                    df_uploaded['sentiment'] = df_uploaded['text'].apply(lambda x: get_overall_score(x, model_choice))
                    st.subheader("Uploaded Data Sentiment Analysis")
                    st.write(df_uploaded)
                    fig = px.histogram(df_uploaded, x='sentiment', title='Sentiment Distribution')
                    st.plotly_chart(fig)
        else:
            st.error("Please upload a CSV file.")

# Past sentiment trends (kept from original)
st.subheader("📅 Past Sentiment Trends (Last 14 Days)")
fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
st.plotly_chart(fig1)

# Future sentiment predictions (kept from original)
st.subheader("🔮 Sentiment Prediction for Next 7 Days")
fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
st.plotly_chart(fig2)

# Reset button
if st.sidebar.button('🔄 Reset Analysis'):
    st.experimental_rerun()