SentimentAnalyzerFinbert

Sleeping

App Files Files Community

Soundaryasos commited on Mar 16, 2025

Commit

2df2711

verified ·

1 Parent(s): e77bbae

Update app.py

Browse files

Files changed (1) hide show

app.py +440 -101

app.py CHANGED Viewed

@@ -1,116 +1,455 @@
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 import nltk
 from textblob import TextBlob
-import numpy as np
-from wordcloud import WordCloud
 import plotly.express as px
 from datetime import datetime, timedelta
-from transformers import pipeline
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
 from io import BytesIO
 import base64
 # Ensure necessary NLTK data is available
-nltk.download('punkt')
-st.set_page_config(page_title="Advanced Sentiment Analyzer", layout="wide")
-st.title("🔍 Advanced Sentiment Analysis Dashboard")
-st.markdown("Analyze sentiments with deep insights and visualizations")
-# Sidebar for user input
-st.sidebar.subheader("Enter Text for Sentiment Analysis")
-user_input = st.sidebar.text_area("Type or paste text here", "The product is amazing!")
-# Initialize sentiment analyzers
-analyzer = SentimentIntensityAnalyzer()
-bert_sentiment = pipeline("sentiment-analysis")
-def analyze_vader_sentiment(text):
-    score = analyzer.polarity_scores(text)['compound']
-    return "Positive" if score > 0.05 else "Negative" if score < -0.05 else "Neutral"
-def analyze_bert_sentiment(text):
-    result = bert_sentiment(text)[0]
-    return result['label']
-def analyze_textblob_sentiment(text):
-    return "Positive" if TextBlob(text).sentiment.polarity > 0 else "Negative" if TextBlob(text).sentiment.polarity < 0 else "Neutral"
-if user_input:
-    vader_result = analyze_vader_sentiment(user_input)
-    bert_result = analyze_bert_sentiment(user_input)
-    textblob_result = analyze_textblob_sentiment(user_input)
-    st.sidebar.markdown(f"**VADER Sentiment:** {vader_result}")
-    st.sidebar.markdown(f"**BERT Sentiment:** {bert_result}")
-    st.sidebar.markdown(f"**TextBlob Sentiment:** {textblob_result}")
-# Simulated past sentiment data
-dates = [datetime.today() - timedelta(days=i) for i in range(14)]
-sentiment_scores = np.random.uniform(-1, 1, len(dates))
-df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})
-# Train a regression model
-X = np.array(range(len(df))).reshape(-1, 1)
-y = df["Sentiment Score"]
-model = LinearRegression()
-model.fit(X, y)
-# Predict for next 7 days
-future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
-X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
-predictions = model.predict(X_future)
-future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})
-# Past sentiment trends
-st.subheader("📅 Past Sentiment Trends (Last 14 Days)")
-fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
-st.plotly_chart(fig1)
-# Future sentiment predictions
-st.subheader("🔮 Sentiment Prediction for Next 7 Days")
-fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
-st.plotly_chart(fig2)
-# Sentiment distribution pie chart
-st.subheader("📊 Sentiment Distribution")
-fig3 = px.pie(values=[sum(df['Sentiment Score'] > 0), sum(df['Sentiment Score'] <= 0)], names=['Positive', 'Negative'], title='Sentiment Distribution', hole=0.3)
-st.plotly_chart(fig3)
-# Sentiment scatter plot
-st.subheader("🔎 Sentiment Scatter Plot (Last 14 Days)")
-fig4 = px.scatter(df, x='Date', y='Sentiment Score', title='Sentiment Over Time')
-st.plotly_chart(fig4)
-# Rolling average sentiment
-st.subheader("📈 Rolling Average of Sentiment (7-Day Window)")
-df['Rolling Avg Sentiment'] = df['Sentiment Score'].rolling(window=7).mean()
-fig5 = px.line(df, x='Date', y='Rolling Avg Sentiment', title="7-Day Rolling Average Sentiment")
-st.plotly_chart(fig5)
-# Generate Word Cloud
-def generate_wordcloud(text):
-    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
-    img = BytesIO()
-    wordcloud.to_image().save(img, format='PNG')
-    return base64.b64encode(img.getvalue()).decode()
-# Word Cloud
-st.subheader("☁️ Word Cloud")
-if user_input:
-    wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
-    st.image(wordcloud_img, use_column_width=True)
-# Download Report as CSV
-st.subheader("📄 Download Report")
-csv = df.to_csv(index=False).encode('utf-8')
-st.download_button(label="Download CSV", data=csv, file_name="sentiment_analysis.csv", mime='text/csv')
-st.sidebar.markdown("Developed with ❤️")

 import streamlit as st
 import pandas as pd
+import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 import nltk
 from textblob import TextBlob
+from wordcloud import WordCloud, STOPWORDS
 import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
 from datetime import datetime, timedelta
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 from sklearn.linear_model import LinearRegression
+from sklearn.ensemble import RandomForestRegressor
 from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler
+from sklearn.metrics import mean_squared_error, r2_score
 from io import BytesIO
 import base64
+import re
+import json
+import altair as alt
+import time
+import requests
+from PIL import Image
+from collections import Counter
+import spacy
+import emoji
+import warnings
+warnings.filterwarnings('ignore')
+# Initialize spaCy for advanced NLP
+try:
+    nlp = spacy.load("en_core_web_sm")
+except:
+    st.warning("Installing spaCy model. This might take a minute...")
+    import subprocess
+    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], capture_output=True)
+    nlp = spacy.load("en_core_web_sm")
 # Ensure necessary NLTK data is available
+nltk.download('punkt', quiet=True)
+nltk.download('stopwords', quiet=True)
+nltk.download('wordnet', quiet=True)
+nltk.download('vader_lexicon', quiet=True)
+# Page Configuration
+st.set_page_config(
+    page_title="Sentiment Pulse | Advanced Sentiment Analyzer",
+    page_icon="🔮",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Apply custom CSS for modern look
+st.markdown("""
+<style>
+    /* Main theme colors */
+    :root {
+        --primary: #7B68EE;
+        --secondary: #00BFFF;
+        --background: #F8F9FA;
+        --text: #333333;
+        --accent: #FF69B4;
+    }
+    /* Base Styles */
+    .reportview-container {
+        background-color: var(--background);
+        color: var(--text);
+    }
+    .sidebar .sidebar-content {
+        background-image: linear-gradient(to bottom, var(--primary), var(--secondary));
+        color: white;
+    }
+    /* Card-like containers */
+    .card {
+        background-color: white;
+        border-radius: 10px;
+        padding: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        margin-bottom: 20px;
+    }
+    /* Header styling */
+    h1, h2, h3 {
+        color: var(--primary);
+        font-weight: 700;
+    }
+    /* Button styling */
+    .stButton>button {
+        background-color: var(--primary);
+        color: white;
+        border-radius: 8px;
+        border: none;
+        transition: all 0.3s;
+    }
+    .stButton>button:hover {
+        background-color: var(--secondary);
+        transform: translateY(-2px);
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
+    }
+    /* Metric styling */
+    .metric-value {
+        font-size: 32px;
+        font-weight: 700;
+        color: var(--primary);
+    }
+    .metric-label {
+        font-size: 14px;
+        color: var(--text);
+        opacity: 0.7;
+    }
+    /* Divider */
+    .divider {
+        height: 3px;
+        background-image: linear-gradient(to right, var(--primary), var(--secondary));
+        margin: 20px 0;
+        border-radius: 3px;
+    }
+    /* Hide hamburger menu and footer */
+    #MainMenu {visibility: hidden;}
+    footer {visibility: hidden;}
+    /* Custom tab styling */
+    .stTabs [data-baseweb="tab-list"] {
+        gap: 8px;
+    }
+    .stTabs [data-baseweb="tab"] {
+        background-color: transparent;
+        border-radius: 4px 4px 0px 0px;
+        border: none;
+        color: var(--text);
+        padding: 10px 16px;
+    }
+    .stTabs [aria-selected="true"] {
+        background-color: white !important;
+        color: var(--primary) !important;
+        font-weight: bold;
+        border-top: 2px solid var(--primary);
+    }
+    /* Tooltip */
+    .tooltip {
+        position: relative;
+        display: inline-block;
+        border-bottom: 1px dotted black;
+    }
+    .tooltip .tooltiptext {
+        visibility: hidden;
+        width: 200px;
+        background-color: #555;
+        color: #fff;
+        text-align: center;
+        border-radius: 6px;
+        padding: 5px;
+        position: absolute;
+        z-index: 1;
+        bottom: 125%;
+        left: 50%;
+        margin-left: -100px;
+        opacity: 0;
+        transition: opacity 0.3s;
+    }
+    .tooltip:hover .tooltiptext {
+        visibility: visible;
+        opacity: 1;
+    }
+</style>
+""", unsafe_allow_html=True)
+# ===== UTILITY FUNCTIONS =====
+def clean_text(text):
+    """Clean and preprocess text for analysis"""
+    if not isinstance(text, str):
+        return ""
+    # Convert to lowercase
+    text = text.lower()
+    # Remove URLs
+    text = re.sub(r'https?://\S+|www\.\S+', '', text)
+    # Remove mentions and hashtags for analysis
+    text = re.sub(r'@\w+|#\w+', '', text)
+    # Remove punctuation and special characters
+    text = re.sub(r'[^\w\s]', '', text)
+    # Remove extra whitespace
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def extract_hashtags(text):
+    """Extract hashtags from text"""
+    if not isinstance(text, str):
+        return []
+    return re.findall(r'#(\w+)', text)
+def extract_mentions(text):
+    """Extract mentions from text"""
+    if not isinstance(text, str):
+        return []
+    return re.findall(r'@(\w+)', text)
+def count_emojis(text):
+    """Count emojis in text"""
+    if not isinstance(text, str):
+        return 0
+    return len([c for c in text if c in emoji.EMOJI_DATA])
+def get_emoji_sentiment(text):
+    """Get sentiment of emojis in text"""
+    if not isinstance(text, str):
+        return 0
+    # Simple dictionary of emoji sentiment (expand as needed)
+    emoji_sentiment = {
+        '😊': 1, '😃': 1, '😄': 1, '😁': 1, '😍': 1,
+        '😢': -1, '😭': -1, '😡': -1, '😠': -1, '😞': -1
+    }
+    sentiment = 0
+    for char in text:
+        if char in emoji_sentiment:
+            sentiment += emoji_sentiment[char]
+    return sentiment
+def generate_wordcloud(text, mask=None, background_color='white'):
+    """Generate word cloud from text"""
+    if not text or not isinstance(text, str):
+        return None
+    stopwords = set(STOPWORDS)
+    # Add custom stopwords
+    custom_stopwords = {'the', 'and', 'to', 'of', 'a', 'in', 'is', 'that', 'it', 'was'}
+    stopwords.update(custom_stopwords)
+    wordcloud = WordCloud(
+        width=800,
+        height=400,
+        background_color=background_color,
+        stopwords=stopwords,
+        max_words=150,
+        colormap='viridis',
+        contour_width=3,
+        contour_color='steelblue',
+        collocations=False
+    ).generate(text)
+    return wordcloud
+def get_entity_analysis(text):
+    """Extract named entities from text using spaCy"""
+    if not text or not isinstance(text, str):
+        return {}
+    doc = nlp(text)
+    entities = {}
+    for ent in doc.ents:
+        if ent.label_ not in entities:
+            entities[ent.label_] = []
+        entities[ent.label_].append(ent.text)
+    return entities
+def extract_keywords(text, top_n=10):
+    """Extract keywords from text using spaCy"""
+    if not text or not isinstance(text, str):
+        return []
+    doc = nlp(text)
+    keywords = []
+    for token in doc:
+        if (not token.is_stop and
+            not token.is_punct and
+            token.pos_ in ('NOUN', 'PROPN', 'ADJ') and
+            len(token.text) > 1):
+            keywords.append(token.text.lower())
+    # Count and get top keywords
+    keyword_counts = Counter(keywords)
+    return keyword_counts.most_common(top_n)
+def analyze_tone(text):
+    """Analyze the tone of text"""
+    if not text or not isinstance(text, str):
+        return "Neutral"
+    # Use TextBlob for sentiment
+    blob = TextBlob(text)
+    polarity = blob.sentiment.polarity
+    subjectivity = blob.sentiment.subjectivity
+    # Tone categories
+    if polarity > 0.5:
+        if subjectivity > 0.7:
+            return "Enthusiastic"
+        else:
+            return "Positive"
+    elif polarity > 0.1:
+        if subjectivity > 0.7:
+            return "Interested"
+        else:
+            return "Somewhat Positive"
+    elif polarity < -0.5:
+        if subjectivity > 0.7:
+            return "Angry"
+        else:
+            return "Negative"
+    elif polarity < -0.1:
+        if subjectivity > 0.7:
+            return "Frustrated"
+        else:
+            return "Somewhat Negative"
+    else:
+        if subjectivity > 0.7:
+            return "Uncertain"
+        else:
+            return "Neutral"
+def analyze_readability(text):
+    """Analyze text readability metrics"""
+    if not text or not isinstance(text, str):
+        return {}
+    # Word count
+    words = text.split()
+    word_count = len(words)
+    if word_count == 0:
+        return {
+            "word_count": 0,
+            "avg_word_length": 0,
+            "avg_sentence_length": 0,
+            "readability_score": 0,
+            "complexity": "N/A"
+        }
+    # Sentence count
+    sentences = nltk.sent_tokenize(text)
+    sentence_count = len(sentences)
+    # Average word length
+    avg_word_length = sum(len(word) for word in words) / word_count if word_count > 0 else 0
+    # Average sentence length
+    avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
+    # Simplified readability score (based on avg word & sentence length)
+    readability_score = 206.835 - (1.015 * avg_sentence_length) - (84.6 * avg_word_length / 5)
+    readability_score = max(0, min(100, readability_score))
+    # Determine complexity
+    if readability_score > 90:
+        complexity = "Very Easy"
+    elif readability_score > 80:
+        complexity = "Easy"
+    elif readability_score > 70:
+        complexity = "Fairly Easy"
+    elif readability_score > 60:
+        complexity = "Standard"
+    elif readability_score > 50:
+        complexity = "Fairly Difficult"
+    elif readability_score > 30:
+        complexity = "Difficult"
+    else:
+        complexity = "Very Difficult"
+    return {
+        "word_count": word_count,
+        "avg_word_length": round(avg_word_length, 2),
+        "avg_sentence_length": round(avg_sentence_length, 2),
+        "readability_score": round(readability_score, 2),
+        "complexity": complexity
+    }
+def get_sentiment_color(score):
+    """Get color based on sentiment score"""
+    if score > 0.5:
+        return "#2E8B57"  # Strong positive: Sea Green
+    elif score > 0:
+        return "#90EE90"  # Positive: Light Green
+    elif score == 0:
+        return "#D3D3D3"  # Neutral: Light Gray
+    elif score > -0.5:
+        return "#FFA07A"  # Negative: Light Salmon
+    else:
+        return "#DC143C"  # Strong negative: Crimson
+def map_sentiment_to_emoji(score):
+    """Map sentiment score to emoji"""
+    if score > 0.75:
+        return "😍"
+    elif score > 0.5:
+        return "😁"
+    elif score > 0.25:
+        return "🙂"
+    elif score > 0:
+        return "😊"
+    elif score == 0:
+        return "😐"
+    elif score > -0.25:
+        return "😕"
+    elif score > -0.5:
+        return "😟"
+    elif score > -0.75:
+        return "😞"
+    else:
+        return "😡"
+def download_as_file(object_to_download, download_filename, button_text, pickle_it=False):
+    """
+    Generates a link to download the given object_to_download.
+    Args:
+        object_to_download: The object to be downloaded.
+        download_filename: Filename that the object will be saved as.
+        button_text: Text to display on the download button.
+        pickle_it: If True, pickle file.
+    """
+    if pickle_it:
+        try:
+            object_to_download = pickle.dumps(object_to_download)
+        except pickle.PicklingError:
+            return None
+    # Convert to bytes
+    if isinstance(object_to_download, bytes):
+        pass
+    elif isinstance(object_to_download, pd.DataFrame):
+        object_to_download = object_to_download.to_csv(index=False).encode()
+    # Add other data types as needed
+    else:
+        object_to_download = str(object_to_download).encode()
+    # Generate download button
+    b64 = base64.b64encode(object_to_download).decode()
+    button_uuid = str(hash(button_text))