SentimentAnalyzerFinbert

Sleeping

App Files Files Community

SentimentAnalyzerFinbert / app.py

Soundaryasos

Update app.py

5a4767a verified about 1 year ago

raw

history blame

4.53 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from datetime import datetime
	import plotly.express as px
	import plotly.graph_objects as go
	from sklearn.linear_model import LinearRegression
	from sklearn.ensemble import RandomForestRegressor
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
	from wordcloud import WordCloud, STOPWORDS
	import matplotlib.pyplot as plt
	from io import BytesIO
	import base64
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.stem import WordNetLemmatizer
	import re
	from textblob import TextBlob

	# Ensure necessary NLTK resources are downloaded
	nltk_resources = ['punkt', 'stopwords', 'wordnet']
	for resource in nltk_resources:
	try:
	nltk.data.find(f'corpora/{resource}')
	except LookupError:
	nltk.download(resource)

	# Streamlit Page Configuration
	st.set_page_config(
	page_title="SentiMind Pro - Advanced Sentiment Analysis",
	page_icon="📊",
	layout="wide"
	)

	# Initialize Sentiment Analysis Models
	@st.cache_resource()
	def load_models():
	sentiment_models = {
	'vader': SentimentIntensityAnalyzer(),
	'textblob': TextBlob
	}

	try:
	model_name = "distilbert-base-uncased-finetuned-sst-2-english"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	sentiment_models['bert'] = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
	except Exception as e:
	st.warning(f"Could not load BERT model: {e}")
	sentiment_models['bert'] = None

	return sentiment_models

	models = load_models()

	# Text Preprocessing Function
	def preprocess_text(text):
	text = text.lower()
	text = re.sub(r'http\S+\|www\S+', '', text) # Remove URLs
	text = re.sub(r'@\w+\|#\w+', '', text) # Remove mentions and hashtags
	text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
	text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces

	tokens = word_tokenize(text)
	stop_words = set(stopwords.words('english'))
	tokens = [word for word in tokens if word not in stop_words]

	lemmatizer = WordNetLemmatizer()
	tokens = [lemmatizer.lemmatize(word) for word in tokens]

	return ' '.join(tokens)

	# Sentiment Analysis Function
	def analyze_sentiment(text):
	processed_text = preprocess_text(text)
	vader_score = models['vader'].polarity_scores(text)['compound']

	if models['bert']:
	bert_result = models['bert'](text)[0]
	bert_score = bert_result['score'] if bert_result['label'] == 'POSITIVE' else -bert_result['score']
	else:
	bert_score = 0

	textblob_score = models['textblob'](text).sentiment.polarity
	combined_score = (0.4 * vader_score + 0.4 * bert_score + 0.2 * textblob_score)

	return {
	'vader': vader_score,
	'bert': bert_score,
	'textblob': textblob_score,
	'combined': combined_score
	}

	# Word Cloud Generation
	def generate_wordcloud(text):
	stopwords_set = set(STOPWORDS)
	wordcloud = WordCloud(width=800, height=400, stopwords=stopwords_set, background_color='white').generate(text)

	img = BytesIO()
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis('off')
	plt.savefig(img, format='PNG', bbox_inches='tight')
	plt.close()

	return base64.b64encode(img.getvalue()).decode()

	# Streamlit UI
	def main():
	st.title("📊 SentiMind Pro - Advanced Sentiment Analysis")
	st.subheader("Analyze text sentiment using multiple models!")

	user_input = st.text_area("Enter your text for sentiment analysis:")

	if st.button("Analyze Sentiment") and user_input:
	with st.spinner("Analyzing..."):
	sentiment_results = analyze_sentiment(user_input)

	st.metric("VADER Sentiment", f"{sentiment_results['vader']:.2f}")
	st.metric("BERT Sentiment", f"{sentiment_results['bert']:.2f}")
	st.metric("TextBlob Sentiment", f"{sentiment_results['textblob']:.2f}")
	st.metric("Combined Sentiment Score", f"{sentiment_results['combined']:.2f}")

	wordcloud_img = generate_wordcloud(user_input)
	st.image(f"data:image/png;base64,{wordcloud_img}", caption="Word Cloud", use_column_width=True)

	if __name__ == "__main__":
	main()