SentimentAnalyzerFinbert

Sleeping

App Files Files Community

SentimentAnalyzerFinbert / app.py

Soundaryasos

Update app.py

04ef2d6 verified 9 months ago

raw

history blame

8 kB

	import streamlit as st
	from transformers import pipeline
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
	import numpy as np
	import pandas as pd
	from datetime import datetime, timedelta
	import plotly.express as px
	from sklearn.linear_model import LinearRegression
	from wordcloud import WordCloud
	import base64
	from io import BytesIO
	import nltk
	from textblob import TextBlob

	# Download NLTK data
	nltk.download('punkt')

	# Initialize sentiment models
	bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
	vader_analyzer = SentimentIntensityAnalyzer()

	# Generate sample past sentiment data (kept from original for demo purposes)
	dates = [datetime.today() - timedelta(days=i) for i in range(14)]
	sentiment_scores = np.random.uniform(-1, 1, len(dates))
	df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})

	# Train a regression model for predictions
	X = np.array(range(len(df))).reshape(-1, 1)
	y = df["Sentiment Score"]
	model = LinearRegression()
	model.fit(X, y)

	# Predict for next 7 days
	future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
	X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
	predictions = model.predict(X_future)
	future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})

	# Generate Word Cloud
	def generate_wordcloud(text):
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
	img = BytesIO()
	wordcloud.to_image().save(img, format='PNG')
	return base64.b64encode(img.getvalue()).decode()

	# Helper function to convert BERT labels to numerical scores
	def bert_score(result):
	label = result['label']
	if label == '1 star':
	return -1
	elif label == '2 stars':
	return -0.5
	elif label == '3 stars':
	return 0
	elif label == '4 stars':
	return 0.5
	elif label == '5 stars':
	return 1
	return 0

	# Get overall sentiment score based on selected model
	def get_overall_score(text, model_choice):
	if model_choice == "VADER":
	return vader_analyzer.polarity_scores(text)['compound']
	elif model_choice == "BERT":
	result = bert_sentiment(text)[0]
	return bert_score(result)
	elif model_choice == "TextBlob":
	return TextBlob(text).sentiment.polarity

	# Streamlit app setup
	st.title("🌟 Advanced Sentiment Analysis Dashboard")

	# Sidebar for user input and controls
	st.sidebar.header("🔍 Sentiment Analysis Controls")
	analysis_mode = st.sidebar.radio("Analysis Mode", ["Single Text", "Compare Two Texts", "Analyze CSV File"])

	if analysis_mode == "Single Text":
	user_input = st.sidebar.text_area("Enter text for sentiment analysis")
	elif analysis_mode == "Compare Two Texts":
	user_input_a = st.sidebar.text_area("Enter first text")
	user_input_b = st.sidebar.text_area("Enter second text")
	elif analysis_mode == "Analyze CSV File":
	uploaded_file = st.sidebar.file_uploader("Upload a CSV file with 'text' column", type=["csv"])

	model_choice = st.sidebar.selectbox("Choose Sentiment Model", ["VADER", "BERT", "TextBlob"])

	# Analyze button handler
	if st.sidebar.button("Analyze Sentiment"):
	if analysis_mode == "Single Text":
	if not user_input.strip():
	st.error("Please enter some text for analysis.")
	elif not any(c.isalpha() for c in user_input):
	st.error("Input should contain at least one alphabetic character.")
	else:
	with st.spinner("Analyzing text..."):
	overall_score = get_overall_score(user_input, model_choice)
	st.subheader("📊 Overall Sentiment Analysis")
	st.write(f"Sentiment Score ({model_choice}): {overall_score:.2f}")

	# Sentence-level analysis
	sentences = nltk.sent_tokenize(user_input)
	if model_choice == "VADER":
	sentence_scores = [vader_analyzer.polarity_scores(s)['compound'] for s in sentences]
	elif model_choice == "BERT":
	sentence_scores = [bert_score(bert_sentiment(s)[0]) for s in sentences]
	elif model_choice == "TextBlob":
	sentence_scores = [TextBlob(s).sentiment.polarity for s in sentences]

	sentiment_df = pd.DataFrame({"Sentence": sentences, "Sentiment Score": sentence_scores})
	st.subheader("🔍 Sentence-Level Sentiment")
	st.write(sentiment_df)
	fig = px.bar(sentiment_df, x="Sentence", y="Sentiment Score", title="Sentiment per Sentence")
	st.plotly_chart(fig)

	# Word cloud
	st.subheader("☁️ Word Cloud")
	wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
	st.image(wordcloud_img, use_column_width=True)

	# Download results
	@st.cache_data
	def convert_df_to_csv(df):
	return df.to_csv(index=False).encode('utf-8')
	csv = convert_df_to_csv(sentiment_df)
	st.download_button(
	label="Download Sentiment Data",
	data=csv,
	file_name='sentiment_data.csv',
	mime='text/csv',
	)

	elif analysis_mode == "Compare Two Texts":
	if not user_input_a.strip() or not user_input_b.strip():
	st.error("Please enter both texts for comparison.")
	elif not any(c.isalpha() for c in user_input_a) or not any(c.isalpha() for c in user_input_b):
	st.error("Both inputs should contain at least one alphabetic character.")
	else:
	with st.spinner("Analyzing texts..."):
	overall_score_a = get_overall_score(user_input_a, model_choice)
	overall_score_b = get_overall_score(user_input_b, model_choice)
	col1, col2 = st.columns(2)
	with col1:
	st.subheader("Text A")
	st.write(f"Sentiment Score ({model_choice}): {overall_score_a:.2f}")
	with col2:
	st.subheader("Text B")
	st.write(f"Sentiment Score ({model_choice}): {overall_score_b:.2f}")
	comparison_df = pd.DataFrame({
	"Text": ["Text A", "Text B"],
	"Sentiment Score": [overall_score_a, overall_score_b]
	})
	fig = px.bar(comparison_df, x="Text", y="Sentiment Score", title="Sentiment Comparison")
	st.plotly_chart(fig)

	elif analysis_mode == "Analyze CSV File":
	if uploaded_file is not None:
	df_uploaded = pd.read_csv(uploaded_file)
	if "text" not in df_uploaded.columns:
	st.error("CSV file must contain a 'text' column.")
	else:
	with st.spinner("Analyzing uploaded texts..."):
	df_uploaded['sentiment'] = df_uploaded['text'].apply(lambda x: get_overall_score(x, model_choice))
	st.subheader("Uploaded Data Sentiment Analysis")
	st.write(df_uploaded)
	fig = px.histogram(df_uploaded, x='sentiment', title='Sentiment Distribution')
	st.plotly_chart(fig)
	else:
	st.error("Please upload a CSV file.")

	# Past sentiment trends (kept from original)
	st.subheader("📅 Past Sentiment Trends (Last 14 Days)")
	fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
	st.plotly_chart(fig1)

	# Future sentiment predictions (kept from original)
	st.subheader("🔮 Sentiment Prediction for Next 7 Days")
	fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
	st.plotly_chart(fig2)

	# Reset button
	if st.sidebar.button('🔄 Reset Analysis'):
	st.experimental_rerun()