Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.ensemble import RandomForestRegressor | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
| from wordcloud import WordCloud, STOPWORDS | |
| import matplotlib.pyplot as plt | |
| from io import BytesIO | |
| import base64 | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| from nltk.stem import WordNetLemmatizer | |
| import re | |
| from textblob import TextBlob | |
| # Ensure necessary NLTK resources are downloaded | |
| nltk_resources = ['punkt', 'stopwords', 'wordnet'] | |
| for resource in nltk_resources: | |
| try: | |
| nltk.data.find(f'corpora/{resource}') | |
| except LookupError: | |
| nltk.download(resource) | |
| # Streamlit Page Configuration | |
| st.set_page_config( | |
| page_title="SentiMind Pro - Advanced Sentiment Analysis", | |
| page_icon="๐", | |
| layout="wide" | |
| ) | |
| # Initialize Sentiment Analysis Models | |
| def load_models(): | |
| sentiment_models = { | |
| 'vader': SentimentIntensityAnalyzer(), | |
| 'textblob': TextBlob | |
| } | |
| try: | |
| model_name = "distilbert-base-uncased-finetuned-sst-2-english" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| sentiment_models['bert'] = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) | |
| except Exception as e: | |
| st.warning(f"Could not load BERT model: {e}") | |
| sentiment_models['bert'] = None | |
| return sentiment_models | |
| models = load_models() | |
| # Text Preprocessing Function | |
| def preprocess_text(text): | |
| text = text.lower() | |
| text = re.sub(r'http\S+|www\S+', '', text) # Remove URLs | |
| text = re.sub(r'@\w+|#\w+', '', text) # Remove mentions and hashtags | |
| text = re.sub(r'[^\w\s]', '', text) # Remove punctuation | |
| text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces | |
| tokens = word_tokenize(text) | |
| stop_words = set(stopwords.words('english')) | |
| tokens = [word for word in tokens if word not in stop_words] | |
| lemmatizer = WordNetLemmatizer() | |
| tokens = [lemmatizer.lemmatize(word) for word in tokens] | |
| return ' '.join(tokens) | |
| # Sentiment Analysis Function | |
| def analyze_sentiment(text): | |
| processed_text = preprocess_text(text) | |
| vader_score = models['vader'].polarity_scores(text)['compound'] | |
| if models['bert']: | |
| bert_result = models['bert'](text)[0] | |
| bert_score = bert_result['score'] if bert_result['label'] == 'POSITIVE' else -bert_result['score'] | |
| else: | |
| bert_score = 0 | |
| textblob_score = models['textblob'](text).sentiment.polarity | |
| combined_score = (0.4 * vader_score + 0.4 * bert_score + 0.2 * textblob_score) | |
| return { | |
| 'vader': vader_score, | |
| 'bert': bert_score, | |
| 'textblob': textblob_score, | |
| 'combined': combined_score | |
| } | |
| # Word Cloud Generation | |
| def generate_wordcloud(text): | |
| stopwords_set = set(STOPWORDS) | |
| wordcloud = WordCloud(width=800, height=400, stopwords=stopwords_set, background_color='white').generate(text) | |
| img = BytesIO() | |
| plt.figure(figsize=(10, 5)) | |
| plt.imshow(wordcloud, interpolation='bilinear') | |
| plt.axis('off') | |
| plt.savefig(img, format='PNG', bbox_inches='tight') | |
| plt.close() | |
| return base64.b64encode(img.getvalue()).decode() | |
| # Streamlit UI | |
| def main(): | |
| st.title("๐ SentiMind Pro - Advanced Sentiment Analysis") | |
| st.subheader("Analyze text sentiment using multiple models!") | |
| user_input = st.text_area("Enter your text for sentiment analysis:") | |
| if st.button("Analyze Sentiment") and user_input: | |
| with st.spinner("Analyzing..."): | |
| sentiment_results = analyze_sentiment(user_input) | |
| st.metric("VADER Sentiment", f"{sentiment_results['vader']:.2f}") | |
| st.metric("BERT Sentiment", f"{sentiment_results['bert']:.2f}") | |
| st.metric("TextBlob Sentiment", f"{sentiment_results['textblob']:.2f}") | |
| st.metric("Combined Sentiment Score", f"{sentiment_results['combined']:.2f}") | |
| wordcloud_img = generate_wordcloud(user_input) | |
| st.image(f"data:image/png;base64,{wordcloud_img}", caption="Word Cloud", use_column_width=True) | |
| if __name__ == "__main__": | |
| main() | |