Soundaryasos's picture
Update app.py
5a4767a verified
raw
history blame
4.53 kB
import streamlit as st
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
from io import BytesIO
import base64
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
from textblob import TextBlob
# Ensure necessary NLTK resources are downloaded
nltk_resources = ['punkt', 'stopwords', 'wordnet']
for resource in nltk_resources:
try:
nltk.data.find(f'corpora/{resource}')
except LookupError:
nltk.download(resource)
# Streamlit Page Configuration
st.set_page_config(
page_title="SentiMind Pro - Advanced Sentiment Analysis",
page_icon="๐Ÿ“Š",
layout="wide"
)
# Initialize Sentiment Analysis Models
@st.cache_resource()
def load_models():
sentiment_models = {
'vader': SentimentIntensityAnalyzer(),
'textblob': TextBlob
}
try:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_models['bert'] = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
except Exception as e:
st.warning(f"Could not load BERT model: {e}")
sentiment_models['bert'] = None
return sentiment_models
models = load_models()
# Text Preprocessing Function
def preprocess_text(text):
text = text.lower()
text = re.sub(r'http\S+|www\S+', '', text) # Remove URLs
text = re.sub(r'@\w+|#\w+', '', text) # Remove mentions and hashtags
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
tokens = word_tokenize(text)
stop_words = set(stopwords.words('english'))
tokens = [word for word in tokens if word not in stop_words]
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(word) for word in tokens]
return ' '.join(tokens)
# Sentiment Analysis Function
def analyze_sentiment(text):
processed_text = preprocess_text(text)
vader_score = models['vader'].polarity_scores(text)['compound']
if models['bert']:
bert_result = models['bert'](text)[0]
bert_score = bert_result['score'] if bert_result['label'] == 'POSITIVE' else -bert_result['score']
else:
bert_score = 0
textblob_score = models['textblob'](text).sentiment.polarity
combined_score = (0.4 * vader_score + 0.4 * bert_score + 0.2 * textblob_score)
return {
'vader': vader_score,
'bert': bert_score,
'textblob': textblob_score,
'combined': combined_score
}
# Word Cloud Generation
def generate_wordcloud(text):
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(width=800, height=400, stopwords=stopwords_set, background_color='white').generate(text)
img = BytesIO()
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.savefig(img, format='PNG', bbox_inches='tight')
plt.close()
return base64.b64encode(img.getvalue()).decode()
# Streamlit UI
def main():
st.title("๐Ÿ“Š SentiMind Pro - Advanced Sentiment Analysis")
st.subheader("Analyze text sentiment using multiple models!")
user_input = st.text_area("Enter your text for sentiment analysis:")
if st.button("Analyze Sentiment") and user_input:
with st.spinner("Analyzing..."):
sentiment_results = analyze_sentiment(user_input)
st.metric("VADER Sentiment", f"{sentiment_results['vader']:.2f}")
st.metric("BERT Sentiment", f"{sentiment_results['bert']:.2f}")
st.metric("TextBlob Sentiment", f"{sentiment_results['textblob']:.2f}")
st.metric("Combined Sentiment Score", f"{sentiment_results['combined']:.2f}")
wordcloud_img = generate_wordcloud(user_input)
st.image(f"data:image/png;base64,{wordcloud_img}", caption="Word Cloud", use_column_width=True)
if __name__ == "__main__":
main()