import streamlit as st from transformers import pipeline from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer import numpy as np import pandas as pd from datetime import datetime, timedelta import plotly.express as px from sklearn.linear_model import LinearRegression from wordcloud import WordCloud import base64 from io import BytesIO import nltk from textblob import TextBlob # Download NLTK data nltk.download('punkt') # Initialize sentiment models bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment") vader_analyzer = SentimentIntensityAnalyzer() # Generate sample past sentiment data (kept from original for demo purposes) dates = [datetime.today() - timedelta(days=i) for i in range(14)] sentiment_scores = np.random.uniform(-1, 1, len(dates)) df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores}) # Train a regression model for predictions X = np.array(range(len(df))).reshape(-1, 1) y = df["Sentiment Score"] model = LinearRegression() model.fit(X, y) # Predict for next 7 days future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)] X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1) predictions = model.predict(X_future) future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions}) # Generate Word Cloud def generate_wordcloud(text): wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) img = BytesIO() wordcloud.to_image().save(img, format='PNG') return base64.b64encode(img.getvalue()).decode() # Helper function to convert BERT labels to numerical scores def bert_score(result): label = result['label'] if label == '1 star': return -1 elif label == '2 stars': return -0.5 elif label == '3 stars': return 0 elif label == '4 stars': return 0.5 elif label == '5 stars': return 1 return 0 # Get overall sentiment score based on selected model def get_overall_score(text, model_choice): if model_choice == "VADER": return vader_analyzer.polarity_scores(text)['compound'] elif model_choice == "BERT": result = bert_sentiment(text)[0] return bert_score(result) elif model_choice == "TextBlob": return TextBlob(text).sentiment.polarity # Streamlit app setup st.title("🌟 Advanced Sentiment Analysis Dashboard") # Sidebar for user input and controls st.sidebar.header("🔍 Sentiment Analysis Controls") analysis_mode = st.sidebar.radio("Analysis Mode", ["Single Text", "Compare Two Texts", "Analyze CSV File"]) if analysis_mode == "Single Text": user_input = st.sidebar.text_area("Enter text for sentiment analysis") elif analysis_mode == "Compare Two Texts": user_input_a = st.sidebar.text_area("Enter first text") user_input_b = st.sidebar.text_area("Enter second text") elif analysis_mode == "Analyze CSV File": uploaded_file = st.sidebar.file_uploader("Upload a CSV file with 'text' column", type=["csv"]) model_choice = st.sidebar.selectbox("Choose Sentiment Model", ["VADER", "BERT", "TextBlob"]) # Analyze button handler if st.sidebar.button("Analyze Sentiment"): if analysis_mode == "Single Text": if not user_input.strip(): st.error("Please enter some text for analysis.") elif not any(c.isalpha() for c in user_input): st.error("Input should contain at least one alphabetic character.") else: with st.spinner("Analyzing text..."): overall_score = get_overall_score(user_input, model_choice) st.subheader("📊 Overall Sentiment Analysis") st.write(f"**Sentiment Score ({model_choice})**: {overall_score:.2f}") # Sentence-level analysis sentences = nltk.sent_tokenize(user_input) if model_choice == "VADER": sentence_scores = [vader_analyzer.polarity_scores(s)['compound'] for s in sentences] elif model_choice == "BERT": sentence_scores = [bert_score(bert_sentiment(s)[0]) for s in sentences] elif model_choice == "TextBlob": sentence_scores = [TextBlob(s).sentiment.polarity for s in sentences] sentiment_df = pd.DataFrame({"Sentence": sentences, "Sentiment Score": sentence_scores}) st.subheader("🔍 Sentence-Level Sentiment") st.write(sentiment_df) fig = px.bar(sentiment_df, x="Sentence", y="Sentiment Score", title="Sentiment per Sentence") st.plotly_chart(fig) # Word cloud st.subheader("☁️ Word Cloud") wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}' st.image(wordcloud_img, use_column_width=True) # Download results @st.cache_data def convert_df_to_csv(df): return df.to_csv(index=False).encode('utf-8') csv = convert_df_to_csv(sentiment_df) st.download_button( label="Download Sentiment Data", data=csv, file_name='sentiment_data.csv', mime='text/csv', ) elif analysis_mode == "Compare Two Texts": if not user_input_a.strip() or not user_input_b.strip(): st.error("Please enter both texts for comparison.") elif not any(c.isalpha() for c in user_input_a) or not any(c.isalpha() for c in user_input_b): st.error("Both inputs should contain at least one alphabetic character.") else: with st.spinner("Analyzing texts..."): overall_score_a = get_overall_score(user_input_a, model_choice) overall_score_b = get_overall_score(user_input_b, model_choice) col1, col2 = st.columns(2) with col1: st.subheader("Text A") st.write(f"**Sentiment Score ({model_choice})**: {overall_score_a:.2f}") with col2: st.subheader("Text B") st.write(f"**Sentiment Score ({model_choice})**: {overall_score_b:.2f}") comparison_df = pd.DataFrame({ "Text": ["Text A", "Text B"], "Sentiment Score": [overall_score_a, overall_score_b] }) fig = px.bar(comparison_df, x="Text", y="Sentiment Score", title="Sentiment Comparison") st.plotly_chart(fig) elif analysis_mode == "Analyze CSV File": if uploaded_file is not None: df_uploaded = pd.read_csv(uploaded_file) if "text" not in df_uploaded.columns: st.error("CSV file must contain a 'text' column.") else: with st.spinner("Analyzing uploaded texts..."): df_uploaded['sentiment'] = df_uploaded['text'].apply(lambda x: get_overall_score(x, model_choice)) st.subheader("Uploaded Data Sentiment Analysis") st.write(df_uploaded) fig = px.histogram(df_uploaded, x='sentiment', title='Sentiment Distribution') st.plotly_chart(fig) else: st.error("Please upload a CSV file.") # Past sentiment trends (kept from original) st.subheader("📅 Past Sentiment Trends (Last 14 Days)") fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline') st.plotly_chart(fig1) # Future sentiment predictions (kept from original) st.subheader("🔮 Sentiment Prediction for Next 7 Days") fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline') st.plotly_chart(fig2) # Reset button if st.sidebar.button('🔄 Reset Analysis'): st.experimental_rerun()