Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
| import numpy as np | |
| import pandas as pd | |
| from datetime import datetime, timedelta | |
| import plotly.express as px | |
| from sklearn.linear_model import LinearRegression | |
| from wordcloud import WordCloud | |
| import base64 | |
| from io import BytesIO | |
| import nltk | |
| from textblob import TextBlob | |
| # Download NLTK data | |
| nltk.download('punkt') | |
| # Initialize sentiment models | |
| bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment") | |
| vader_analyzer = SentimentIntensityAnalyzer() | |
| # Generate sample past sentiment data (kept from original for demo purposes) | |
| dates = [datetime.today() - timedelta(days=i) for i in range(14)] | |
| sentiment_scores = np.random.uniform(-1, 1, len(dates)) | |
| df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores}) | |
| # Train a regression model for predictions | |
| X = np.array(range(len(df))).reshape(-1, 1) | |
| y = df["Sentiment Score"] | |
| model = LinearRegression() | |
| model.fit(X, y) | |
| # Predict for next 7 days | |
| future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)] | |
| X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1) | |
| predictions = model.predict(X_future) | |
| future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions}) | |
| # Generate Word Cloud | |
| def generate_wordcloud(text): | |
| wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) | |
| img = BytesIO() | |
| wordcloud.to_image().save(img, format='PNG') | |
| return base64.b64encode(img.getvalue()).decode() | |
| # Helper function to convert BERT labels to numerical scores | |
| def bert_score(result): | |
| label = result['label'] | |
| if label == '1 star': | |
| return -1 | |
| elif label == '2 stars': | |
| return -0.5 | |
| elif label == '3 stars': | |
| return 0 | |
| elif label == '4 stars': | |
| return 0.5 | |
| elif label == '5 stars': | |
| return 1 | |
| return 0 | |
| # Get overall sentiment score based on selected model | |
| def get_overall_score(text, model_choice): | |
| if model_choice == "VADER": | |
| return vader_analyzer.polarity_scores(text)['compound'] | |
| elif model_choice == "BERT": | |
| result = bert_sentiment(text)[0] | |
| return bert_score(result) | |
| elif model_choice == "TextBlob": | |
| return TextBlob(text).sentiment.polarity | |
| # Streamlit app setup | |
| st.title("๐ Advanced Sentiment Analysis Dashboard") | |
| # Sidebar for user input and controls | |
| st.sidebar.header("๐ Sentiment Analysis Controls") | |
| analysis_mode = st.sidebar.radio("Analysis Mode", ["Single Text", "Compare Two Texts", "Analyze CSV File"]) | |
| if analysis_mode == "Single Text": | |
| user_input = st.sidebar.text_area("Enter text for sentiment analysis") | |
| elif analysis_mode == "Compare Two Texts": | |
| user_input_a = st.sidebar.text_area("Enter first text") | |
| user_input_b = st.sidebar.text_area("Enter second text") | |
| elif analysis_mode == "Analyze CSV File": | |
| uploaded_file = st.sidebar.file_uploader("Upload a CSV file with 'text' column", type=["csv"]) | |
| model_choice = st.sidebar.selectbox("Choose Sentiment Model", ["VADER", "BERT", "TextBlob"]) | |
| # Analyze button handler | |
| if st.sidebar.button("Analyze Sentiment"): | |
| if analysis_mode == "Single Text": | |
| if not user_input.strip(): | |
| st.error("Please enter some text for analysis.") | |
| elif not any(c.isalpha() for c in user_input): | |
| st.error("Input should contain at least one alphabetic character.") | |
| else: | |
| with st.spinner("Analyzing text..."): | |
| overall_score = get_overall_score(user_input, model_choice) | |
| st.subheader("๐ Overall Sentiment Analysis") | |
| st.write(f"**Sentiment Score ({model_choice})**: {overall_score:.2f}") | |
| # Sentence-level analysis | |
| sentences = nltk.sent_tokenize(user_input) | |
| if model_choice == "VADER": | |
| sentence_scores = [vader_analyzer.polarity_scores(s)['compound'] for s in sentences] | |
| elif model_choice == "BERT": | |
| sentence_scores = [bert_score(bert_sentiment(s)[0]) for s in sentences] | |
| elif model_choice == "TextBlob": | |
| sentence_scores = [TextBlob(s).sentiment.polarity for s in sentences] | |
| sentiment_df = pd.DataFrame({"Sentence": sentences, "Sentiment Score": sentence_scores}) | |
| st.subheader("๐ Sentence-Level Sentiment") | |
| st.write(sentiment_df) | |
| fig = px.bar(sentiment_df, x="Sentence", y="Sentiment Score", title="Sentiment per Sentence") | |
| st.plotly_chart(fig) | |
| # Word cloud | |
| st.subheader("โ๏ธ Word Cloud") | |
| wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}' | |
| st.image(wordcloud_img, use_column_width=True) | |
| # Download results | |
| def convert_df_to_csv(df): | |
| return df.to_csv(index=False).encode('utf-8') | |
| csv = convert_df_to_csv(sentiment_df) | |
| st.download_button( | |
| label="Download Sentiment Data", | |
| data=csv, | |
| file_name='sentiment_data.csv', | |
| mime='text/csv', | |
| ) | |
| elif analysis_mode == "Compare Two Texts": | |
| if not user_input_a.strip() or not user_input_b.strip(): | |
| st.error("Please enter both texts for comparison.") | |
| elif not any(c.isalpha() for c in user_input_a) or not any(c.isalpha() for c in user_input_b): | |
| st.error("Both inputs should contain at least one alphabetic character.") | |
| else: | |
| with st.spinner("Analyzing texts..."): | |
| overall_score_a = get_overall_score(user_input_a, model_choice) | |
| overall_score_b = get_overall_score(user_input_b, model_choice) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Text A") | |
| st.write(f"**Sentiment Score ({model_choice})**: {overall_score_a:.2f}") | |
| with col2: | |
| st.subheader("Text B") | |
| st.write(f"**Sentiment Score ({model_choice})**: {overall_score_b:.2f}") | |
| comparison_df = pd.DataFrame({ | |
| "Text": ["Text A", "Text B"], | |
| "Sentiment Score": [overall_score_a, overall_score_b] | |
| }) | |
| fig = px.bar(comparison_df, x="Text", y="Sentiment Score", title="Sentiment Comparison") | |
| st.plotly_chart(fig) | |
| elif analysis_mode == "Analyze CSV File": | |
| if uploaded_file is not None: | |
| df_uploaded = pd.read_csv(uploaded_file) | |
| if "text" not in df_uploaded.columns: | |
| st.error("CSV file must contain a 'text' column.") | |
| else: | |
| with st.spinner("Analyzing uploaded texts..."): | |
| df_uploaded['sentiment'] = df_uploaded['text'].apply(lambda x: get_overall_score(x, model_choice)) | |
| st.subheader("Uploaded Data Sentiment Analysis") | |
| st.write(df_uploaded) | |
| fig = px.histogram(df_uploaded, x='sentiment', title='Sentiment Distribution') | |
| st.plotly_chart(fig) | |
| else: | |
| st.error("Please upload a CSV file.") | |
| # Past sentiment trends (kept from original) | |
| st.subheader("๐ Past Sentiment Trends (Last 14 Days)") | |
| fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline') | |
| st.plotly_chart(fig1) | |
| # Future sentiment predictions (kept from original) | |
| st.subheader("๐ฎ Sentiment Prediction for Next 7 Days") | |
| fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline') | |
| st.plotly_chart(fig2) | |
| # Reset button | |
| if st.sidebar.button('๐ Reset Analysis'): | |
| st.experimental_rerun() |