Soundaryasos commited on
Commit
e60a430
·
verified ·
1 Parent(s): 768de6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -115
app.py CHANGED
@@ -1,130 +1,120 @@
1
  import streamlit as st
2
- import pandas as pd
 
3
  import numpy as np
4
- from datetime import datetime
 
5
  import plotly.express as px
6
- import plotly.graph_objects as go
7
  from sklearn.linear_model import LinearRegression
8
- from sklearn.ensemble import RandomForestRegressor
9
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
10
- from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
11
- from wordcloud import WordCloud, STOPWORDS
12
- import matplotlib.pyplot as plt
13
- from io import BytesIO
14
  import base64
15
- import nltk
16
- from nltk.corpus import stopwords
17
- from nltk.tokenize import word_tokenize
18
- from nltk.stem import WordNetLemmatizer
19
- import re
20
- from textblob import TextBlob
21
-
22
- # Ensure necessary NLTK resources are downloaded
23
- nltk_resources = ['punkt', 'stopwords', 'wordnet']
24
- for resource in nltk_resources:
25
- try:
26
- nltk.data.find(f'corpora/{resource}')
27
- except LookupError:
28
- nltk.download(resource)
29
-
30
- # Streamlit Page Configuration
31
- st.set_page_config(
32
- page_title="SentiMind Pro - Advanced Sentiment Analysis",
33
- page_icon="📊",
34
- layout="wide"
35
- )
36
 
37
- # Initialize Sentiment Analysis Models
38
- @st.cache_resource()
39
- def load_models():
40
- sentiment_models = {
41
- 'vader': SentimentIntensityAnalyzer(),
42
- 'textblob': TextBlob
43
- }
44
-
45
- try:
46
- model_name = "distilbert-base-uncased-finetuned-sst-2-english"
47
- tokenizer = AutoTokenizer.from_pretrained(model_name)
48
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
49
- sentiment_models['bert'] = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
50
- except Exception as e:
51
- st.warning(f"Could not load BERT model: {e}")
52
- sentiment_models['bert'] = None
53
-
54
- return sentiment_models
55
 
56
- models = load_models()
 
 
 
57
 
58
- # Text Preprocessing Function
59
- def preprocess_text(text):
60
- text = text.lower()
61
- text = re.sub(r'http\S+|www\S+', '', text) # Remove URLs
62
- text = re.sub(r'@\w+|#\w+', '', text) # Remove mentions and hashtags
63
- text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
64
- text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
65
-
66
- tokens = word_tokenize(text)
67
- stop_words = set(stopwords.words('english'))
68
- tokens = [word for word in tokens if word not in stop_words]
69
-
70
- lemmatizer = WordNetLemmatizer()
71
- tokens = [lemmatizer.lemmatize(word) for word in tokens]
72
-
73
- return ' '.join(tokens)
74
 
75
- # Sentiment Analysis Function
76
- def analyze_sentiment(text):
77
- processed_text = preprocess_text(text)
78
- vader_score = models['vader'].polarity_scores(text)['compound']
79
-
80
- if models['bert']:
81
- bert_result = models['bert'](text)[0]
82
- bert_score = bert_result['score'] if bert_result['label'] == 'POSITIVE' else -bert_result['score']
83
- else:
84
- bert_score = 0
85
-
86
- textblob_score = models['textblob'](text).sentiment.polarity
87
- combined_score = (0.4 * vader_score + 0.4 * bert_score + 0.2 * textblob_score)
88
-
89
- return {
90
- 'vader': vader_score,
91
- 'bert': bert_score,
92
- 'textblob': textblob_score,
93
- 'combined': combined_score
94
- }
95
-
96
- # Word Cloud Generation
97
  def generate_wordcloud(text):
98
- stopwords_set = set(STOPWORDS)
99
- wordcloud = WordCloud(width=800, height=400, stopwords=stopwords_set, background_color='white').generate(text)
100
-
101
  img = BytesIO()
102
- plt.figure(figsize=(10, 5))
103
- plt.imshow(wordcloud, interpolation='bilinear')
104
- plt.axis('off')
105
- plt.savefig(img, format='PNG', bbox_inches='tight')
106
- plt.close()
107
-
108
  return base64.b64encode(img.getvalue()).decode()
109
 
110
- # Streamlit UI
111
- def main():
112
- st.title("📊 SentiMind Pro - Advanced Sentiment Analysis")
113
- st.subheader("Analyze text sentiment using multiple models!")
114
-
115
- user_input = st.text_area("Enter your text for sentiment analysis:")
 
 
 
 
 
 
 
116
 
117
- if st.button("Analyze Sentiment") and user_input:
118
- with st.spinner("Analyzing..."):
119
- sentiment_results = analyze_sentiment(user_input)
120
-
121
- st.metric("VADER Sentiment", f"{sentiment_results['vader']:.2f}")
122
- st.metric("BERT Sentiment", f"{sentiment_results['bert']:.2f}")
123
- st.metric("TextBlob Sentiment", f"{sentiment_results['textblob']:.2f}")
124
- st.metric("Combined Sentiment Score", f"{sentiment_results['combined']:.2f}")
125
-
126
- wordcloud_img = generate_wordcloud(user_input)
127
- st.image(f"data:image/png;base64,{wordcloud_img}", caption="Word Cloud", use_column_width=True)
128
 
129
- if __name__ == "__main__":
130
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
  import numpy as np
5
+ import pandas as pd
6
+ from datetime import datetime, timedelta
7
  import plotly.express as px
 
8
  from sklearn.linear_model import LinearRegression
9
+ from wordcloud import WordCloud
 
 
 
 
 
10
  import base64
11
+ from io import BytesIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Initialize sentiment models
14
+ bert_sentiment = pipeline("sentiment-analysis")
15
+ vader_analyzer = SentimentIntensityAnalyzer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Generate sample past sentiment data
18
+ dates = [datetime.today() - timedelta(days=i) for i in range(14)]
19
+ sentiment_scores = np.random.uniform(-1, 1, len(dates))
20
+ df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})
21
 
22
+ # Train a regression model
23
+ X = np.array(range(len(df))).reshape(-1, 1)
24
+ y = df["Sentiment Score"]
25
+ model = LinearRegression()
26
+ model.fit(X, y)
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Predict for next 7 days
29
+ future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
30
+ X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
31
+ predictions = model.predict(X_future)
32
+
33
+ future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})
34
+
35
+ # Generate Word Cloud
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def generate_wordcloud(text):
37
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
 
 
38
  img = BytesIO()
39
+ wordcloud.to_image().save(img, format='PNG')
 
 
 
 
 
40
  return base64.b64encode(img.getvalue()).decode()
41
 
42
+ # Streamlit app setup
43
+ st.title("Interactive Sentiment Analysis Dashboard")
44
+
45
+ # Sidebar for navigation and settings
46
+ st.sidebar.header("Sentiment Analysis Controls")
47
+ st.sidebar.subheader("Input")
48
+ user_input = st.sidebar.text_input('Enter text for sentiment analysis')
49
+
50
+ # Display sentiment analysis results
51
+ def display_sentiment_analysis(vader_score, bert_result):
52
+ st.subheader("Sentiment Analysis Results:")
53
+ st.write(f"**VADER Sentiment Score**: {vader_score:.2f}")
54
+ st.write(f"**BERT Sentiment**: {bert_result['label']} ({bert_result['score']:.2f})")
55
 
56
+ sentiment_data = {'Positive': max(0, vader_score), 'Negative': min(0, vader_score), 'Neutral': 1 - abs(vader_score)}
57
+ sentiment_df = pd.DataFrame(list(sentiment_data.items()), columns=["Sentiment", "Score"])
58
+ st.bar_chart(sentiment_df.set_index("Sentiment"))
 
 
 
 
 
 
 
 
59
 
60
+ wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
61
+ st.image(wordcloud_img, use_column_width=True)
62
+
63
+ # Analyze sentiment on button click
64
+ if st.sidebar.button('Analyze Sentiment'):
65
+ if user_input:
66
+ with st.spinner('Analyzing text...'):
67
+ vader_score = vader_analyzer.polarity_scores(user_input)['compound']
68
+ bert_result = bert_sentiment(user_input)[0]
69
+ display_sentiment_analysis(vader_score, bert_result)
70
+ else:
71
+ st.warning("Please enter some text for analysis.")
72
+
73
+ # Past sentiment trends
74
+ st.subheader("Past Sentiment Trends (Last 14 Days)")
75
+ fig1 = px.line(df, x='Date', y='Sentiment Score', title='Past Sentiment Trends', markers=True, line_shape='spline')
76
+ st.plotly_chart(fig1)
77
+
78
+ # Future sentiment predictions
79
+ st.subheader("Sentiment Prediction for Next 7 Days")
80
+ fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Sentiment Prediction for Next 7 Days', markers=True, line_shape='spline')
81
+ st.plotly_chart(fig2)
82
+
83
+ # Sentiment distribution pie chart
84
+ st.subheader("Sentiment Distribution")
85
+ fig3 = px.pie(values=[sum(df['Sentiment Score'] > 0), sum(df['Sentiment Score'] <= 0)],
86
+ names=['Positive', 'Negative'], title='Sentiment Distribution', hole=0.3)
87
+ st.plotly_chart(fig3)
88
+
89
+ # Histogram of Sentiment Scores
90
+ st.subheader("Sentiment Score Distribution (Past 14 Days)")
91
+ fig4 = px.histogram(df, x='Sentiment Score', nbins=20, title="Sentiment Score Distribution")
92
+ st.plotly_chart(fig4)
93
+
94
+ # Sentiment heatmap (corrected version)
95
+ st.subheader("Sentiment Heatmap (Past 14 Days)")
96
+ df['Day'] = df['Date'].dt.dayofweek # Monday=0, Sunday=6
97
+ df['Hour'] = np.random.randint(0, 24, len(df)) # Simulating hourly data
98
+ heatmap_data = df.pivot(index='Day', columns='Hour', values='Sentiment Score')
99
+ fig5 = px.imshow(
100
+ heatmap_data,
101
+ title="Heatmap of Sentiment Over Time",
102
+ labels={'x': 'Hour of Day', 'y': 'Day of Week'},
103
+ color_continuous_scale='RdBu'
104
+ )
105
+ st.plotly_chart(fig5)
106
+
107
+ # Sentiment scatter plot
108
+ st.subheader("Sentiment Scatter Plot (Past 14 Days)")
109
+ fig6 = px.scatter(df, x='Date', y='Sentiment Score', title='Sentiment Over Time')
110
+ st.plotly_chart(fig6)
111
+
112
+ # Rolling average sentiment
113
+ st.subheader("Rolling Average of Sentiment (7-Day Window)")
114
+ df['Rolling Avg Sentiment'] = df['Sentiment Score'].rolling(window=7).mean()
115
+ fig7 = px.line(df, x='Date', y='Rolling Avg Sentiment', title="Rolling Average of Sentiment (7-Day Window)")
116
+ st.plotly_chart(fig7)
117
+
118
+ # Reset button
119
+ if st.sidebar.button('Reset Analysis'):
120
+ st.experimental_rerun()