Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import plotly.express as px
|
| 5 |
+
from wordcloud import WordCloud, STOPWORDS
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import folium
|
| 8 |
+
import plotly.express as px
|
| 9 |
+
import seaborn as sns
|
| 10 |
+
import json
|
| 11 |
+
import os
|
| 12 |
+
from streamlit_folium import folium_static
|
| 13 |
+
|
| 14 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
| 15 |
+
|
| 16 |
+
DATA_ = pd.read_csv("states.csv")
|
| 17 |
+
st.title("Sentiment Analysis of Tweets")
|
| 18 |
+
st.sidebar.title("Sentiment Analysis of Tweets")
|
| 19 |
+
st.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets")
|
| 20 |
+
st.sidebar.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def run():
|
| 24 |
+
|
| 25 |
+
@st.cache(persist=True)
|
| 26 |
+
def load_data():
|
| 27 |
+
DATA_['tweet_created'] = pd.to_datetime(DATA_['Datetime'])
|
| 28 |
+
return DATA_
|
| 29 |
+
data = load_data()
|
| 30 |
+
|
| 31 |
+
st.sidebar.subheader("Show random tweet")
|
| 32 |
+
random_tweet = st.sidebar.radio('Sentiment', ('Positive', 'Neutral', 'Negative'))
|
| 33 |
+
st.sidebar.markdown(data.query('sentiment_flair == @random_tweet')[["Text"]].sample(n=1).iat[0,0])
|
| 34 |
+
|
| 35 |
+
st.sidebar.markdown("### Number of tweets by sentiment")
|
| 36 |
+
select = st.sidebar.selectbox('Visualization type', ['Histogram', 'Pie chart'])
|
| 37 |
+
sentiment_count = data['sentiment_flair'].value_counts()
|
| 38 |
+
sentiment_count = pd.DataFrame({'Sentiment':sentiment_count.index, 'Tweets':sentiment_count.values})
|
| 39 |
+
|
| 40 |
+
if not st.sidebar.checkbox("Hide", True):
|
| 41 |
+
st.markdown("### Number of tweets by sentiment")
|
| 42 |
+
if select == "Histogram":
|
| 43 |
+
fig = px.bar(sentiment_count, x='Sentiment', y='Tweets', color='Tweets', height=500)
|
| 44 |
+
st.plotly_chart(fig)
|
| 45 |
+
else:
|
| 46 |
+
fig = px.pie(sentiment_count, values='Tweets', names='Sentiment')
|
| 47 |
+
st.plotly_chart(fig)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
st.sidebar.subheader("When and Where are users tweeting from?")
|
| 51 |
+
hour = st.sidebar.slider("Hour of day", 0,23)
|
| 52 |
+
modified_data = data[data['tweet_created'].dt.hour == hour]
|
| 53 |
+
if not st.sidebar.checkbox("Close", True, key='1'):
|
| 54 |
+
st.markdown("### Tweets locations based on the time of date")
|
| 55 |
+
st.markdown("%i tweets between %i:00 and %i:00" % (len(modified_data), hour, (hour+1)%24))
|
| 56 |
+
st.map(modified_data)
|
| 57 |
+
if st.sidebar.checkbox("Show Raw Data", False):
|
| 58 |
+
st.write(modified_data)
|
| 59 |
+
st.sidebar.subheader("Breakdown language tweets by sentiment")
|
| 60 |
+
choice = st.sidebar.multiselect('Pick language', ('en', 'hi'), key='0')
|
| 61 |
+
|
| 62 |
+
if len(choice) > 0:
|
| 63 |
+
choice_data = data[data.language.isin(choice)]
|
| 64 |
+
fig_choice = px.histogram(choice_data, x='language',
|
| 65 |
+
y='sentiment_flair',
|
| 66 |
+
histfunc = 'count', color = 'sentiment_flair',
|
| 67 |
+
facet_col='sentiment_flair',
|
| 68 |
+
labels={'sentiment_flair':'tweets'}, height=600, width=800)
|
| 69 |
+
st.plotly_chart(fig_choice)
|
| 70 |
+
|
| 71 |
+
st.sidebar.header("Word Cloud")
|
| 72 |
+
word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?',('Positive', 'Neutral','Negative'))
|
| 73 |
+
|
| 74 |
+
if not st.sidebar.checkbox("Close", True, key='3'):
|
| 75 |
+
st.header('Word cloud for %s sentiment' % (word_sentiment))
|
| 76 |
+
df = data[data['sentiment_flair']==word_sentiment]
|
| 77 |
+
words = ' '.join(df['Text'])
|
| 78 |
+
processed_words = ' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word !='RT'])
|
| 79 |
+
wordcloud = WordCloud(stopwords=STOPWORDS,
|
| 80 |
+
background_color='white', height=640, width=800).generate(processed_words)
|
| 81 |
+
plt.imshow(wordcloud)
|
| 82 |
+
plt.xticks([])
|
| 83 |
+
plt.yticks([])
|
| 84 |
+
st.pyplot()
|
| 85 |
+
|
| 86 |
+
#################################### choropleth map #############################################################
|
| 87 |
+
with open('india_state.json') as file:
|
| 88 |
+
geojsonData = json.load(file)
|
| 89 |
+
|
| 90 |
+
for i in geojsonData['features']:
|
| 91 |
+
i['id'] = i['properties']['NAME_1']
|
| 92 |
+
|
| 93 |
+
map_choropleth_high_public = folium.Map(location = [20.5937,78.9629], zoom_start = 4)
|
| 94 |
+
df1 = data
|
| 95 |
+
df1 = df1[df1['location'].notna()]
|
| 96 |
+
|
| 97 |
+
def get_state(x):
|
| 98 |
+
|
| 99 |
+
states = ["Andaman and Nicobar Islands","Andhra Pradesh","Arunachal Pradesh","Assam","Bihar","Chandigarh","Chhattisgarh",
|
| 100 |
+
"Dadra and Nagar Haveli","Daman and Diu","Delhi","Goa","Gujarat","Haryana","Himachal Pradesh","Jammu and Kashmir",
|
| 101 |
+
"Jharkhand","Karnataka","Kerala","Ladakh","Lakshadweep","Madhya Pradesh","Maharashtra","Manipur","Meghalaya",
|
| 102 |
+
"Mizoram","Nagaland","Odisha","Puducherry","Punjab","Rajasthan","Sikkim","Tamil Nadu","Telangana","Tripura","Uttar Pradesh","Uttarakhand","West Bengal"]
|
| 103 |
+
|
| 104 |
+
states_dict = {"Delhi":"New Delhi","Gujarat":"Surat","Haryana":"Gurgaon", "Karnataka":"Bangalore", "Karnataka":"Bengaluru", "Maharashtra":"Pune","Maharashtra":"Mumbai","Maharashtra":"Navi Mumbai","Telangana":"Hyderabad","West Bengal":"Kolkata",
|
| 105 |
+
"Gujarat":"Surat","Rajasthan":"Kota","Rajasthan":"Jodhpur","Karnataka":"Bengaluru South","Uttar Pradesh":"Lukhnow","Uttar Pradesh":"Noida","Bihar":"Patna","Uttarakhand":"Dehradun","Madhya Pradesh":"Indore" , "Madhya Pradesh":"Bhopal",
|
| 106 |
+
"Andaman and Nicobar Islands":"Andaman and Nicobar Islands", "Andhra Pradesh":"Andhra Pradesh","Arunachal Pradesh":"Arunachal Pradesh","Assam":"Assam","Bihar":"Bihar",
|
| 107 |
+
"Chandigarh":"Chandigarh","Chhattisgarh":"Chhattisgarh", "Dadra and Nagar Haveli": "Dadra and Nagar Haveli","Daman and Diu":"Daman and Diu","Delhi":"Delhi",
|
| 108 |
+
"Goa":"Goa","Gujarat":"Gujarat","Haryana":"Haryana","Himachal Pradesh":"Himachal Pradesh","Jammu and Kashmir":"Jammu and Kashmir", "Jharkhand": "Jharkhand",
|
| 109 |
+
"Karnataka":"Karnataka","Kerala":"Kerala","Ladakh":"Ladakh","Lakshadweep":"Lakshadweep","Madhya Pradesh":"Madhya Pradesh","Maharashtra":"Maharashtra",
|
| 110 |
+
"Odisha":"Odisha","Puducherry":"Puducherry","Punjab":"Punjab","Rajasthan":"Rajasthan","Tamil Nadu":"Tamil Nadu","Telangana":"Telangana","Uttar Pradesh":"Uttar Pradesh",
|
| 111 |
+
"Uttarakhand":"Uttarakhand","West Bengal":"West Bengal","West Bengal":"Calcutta","Uttar Pradesh":"Lucknow"
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
abv = x.split(',')[-1].lstrip()
|
| 115 |
+
state_name = x.split(',')[0].lstrip()
|
| 116 |
+
|
| 117 |
+
if abv in states:
|
| 118 |
+
state = abv
|
| 119 |
+
else:
|
| 120 |
+
if state_name in states_dict.values():
|
| 121 |
+
state = list(states_dict.keys())[list(states_dict.values()).index(state_name)]
|
| 122 |
+
|
| 123 |
+
else:
|
| 124 |
+
state = 'Non_India'
|
| 125 |
+
|
| 126 |
+
return state
|
| 127 |
+
|
| 128 |
+
# create abreviated states column
|
| 129 |
+
df2 = df1.copy()
|
| 130 |
+
|
| 131 |
+
df2['states'] = df1['location'].apply(get_state)
|
| 132 |
+
|
| 133 |
+
# extract total sentiment per state
|
| 134 |
+
df_state_sentiment = df2.groupby(['states'])['Label'].value_counts().unstack().fillna(0.0).reset_index()
|
| 135 |
+
df_state_sentiment['total_sentiment'] = -(df_state_sentiment[0])+df_state_sentiment[1]
|
| 136 |
+
dff = df_state_sentiment[df_state_sentiment['states'] != 'Non_India']
|
| 137 |
+
|
| 138 |
+
folium.Choropleth(geo_data=geojsonData,
|
| 139 |
+
data=dff,
|
| 140 |
+
name='CHOROPLETH',
|
| 141 |
+
key_on='feature.id',
|
| 142 |
+
columns = ['states','total_sentiment'],
|
| 143 |
+
fill_color='YlOrRd',
|
| 144 |
+
fill_opacity=0.7,
|
| 145 |
+
line_opacity=0.4,
|
| 146 |
+
legend_name='Sentiments',
|
| 147 |
+
highlight=True).add_to(map_choropleth_high_public)
|
| 148 |
+
|
| 149 |
+
folium.LayerControl().add_to(map_choropleth_high_public)
|
| 150 |
+
|
| 151 |
+
#display(map_choropleth_high_public)
|
| 152 |
+
|
| 153 |
+
st.sidebar.header("Map Visualisation")
|
| 154 |
+
if not st.sidebar.checkbox("Close", True, key='4'):
|
| 155 |
+
folium_static(map_choropleth_high_public)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
if __name__ == '__main__':
|
| 159 |
+
run()
|