Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| from wordcloud import WordCloud, STOPWORDS | |
| import matplotlib.pyplot as plt | |
| import folium | |
| import plotly.express as px | |
| import seaborn as sns | |
| import json | |
| import os | |
| from streamlit_folium import folium_static | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| DATA_ = pd.read_csv("states.csv") | |
| st.title("Sentiment Analysis of Tweets") | |
| st.sidebar.title("Sentiment Analysis of Tweets") | |
| st.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets") | |
| st.sidebar.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets") | |
| def run(): | |
| def load_data(): | |
| DATA_['tweet_created'] = pd.to_datetime(DATA_['Datetime']) | |
| return DATA_ | |
| data = load_data() | |
| st.sidebar.subheader("Show random tweet") | |
| random_tweet = st.sidebar.radio('Sentiment', ('-1','1')) | |
| st.sidebar.markdown(data.query('Labels1 == @random_tweet')[["text_clean_translated"]].sample(n=1).iat[0,0]) | |
| st.sidebar.markdown("### Number of tweets by sentiment") | |
| select = st.sidebar.selectbox('Visualization type', ['Histogram', 'Pie chart']) | |
| sentiment_count = data['Labels1'].value_counts() | |
| sentiment_count = pd.DataFrame({'Sentiment':sentiment_count.index, 'Tweets':sentiment_count.values}) | |
| if not st.sidebar.checkbox("Hide", True): | |
| st.markdown("### Number of tweets by sentiment") | |
| if select == "Histogram": | |
| fig = px.bar(sentiment_count, x='Sentiment', y='Tweets', color='Tweets', height=500) | |
| st.plotly_chart(fig) | |
| else: | |
| fig = px.pie(sentiment_count, values='Tweets', names='Sentiment') | |
| st.plotly_chart(fig) | |
| st.sidebar.subheader("When and Where are users tweeting from?") | |
| hour = st.sidebar.slider("Hour of day", 0,23) | |
| modified_data = data[data['tweet_created'].dt.hour == hour] | |
| if not st.sidebar.checkbox("Close", True, key='1'): | |
| st.markdown("### Tweets locations based on the time of date") | |
| st.markdown("%i tweets between %i:00 and %i:00" % (len(modified_data), hour, (hour+1)%24)) | |
| st.map(modified_data) | |
| if st.sidebar.checkbox("Show Raw Data", False): | |
| st.write(modified_data) | |
| st.sidebar.subheader("Breakdown language tweets by sentiment") | |
| choice = st.sidebar.multiselect('Pick language', ('en', 'hi'), key='0') | |
| if len(choice) > 0: | |
| choice_data = data[data.language.isin(choice)] | |
| fig_choice = px.histogram(choice_data, x='language', | |
| y='sentiment_flair', | |
| histfunc = 'count', color = 'Labels1', | |
| facet_col='Labels1', | |
| labels={'Labels1':'tweets'}, height=600, width=800) | |
| st.plotly_chart(fig_choice) | |
| st.sidebar.header("Word Cloud") | |
| word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?',('Positive', 'Neutral','Negative')) | |
| if not st.sidebar.checkbox("Close", True, key='3'): | |
| st.header('Word cloud for %s sentiment' % (word_sentiment)) | |
| df = data[data['sentiment_flair']==word_sentiment] | |
| words = ' '.join(df['Text']) | |
| processed_words = ' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word !='RT']) | |
| wordcloud = WordCloud(stopwords=STOPWORDS, | |
| background_color='white', height=640, width=800).generate(processed_words) | |
| plt.imshow(wordcloud) | |
| plt.xticks([]) | |
| plt.yticks([]) | |
| st.pyplot() | |
| #################################### choropleth map ############################################################# | |
| with open('india_state.json') as file: | |
| geojsonData = json.load(file) | |
| for i in geojsonData['features']: | |
| i['id'] = i['properties']['NAME_1'] | |
| map_choropleth_high_public = folium.Map(location = [20.5937,78.9629], zoom_start = 4) | |
| df1 = data | |
| df1 = df1[df1['location'].notna()] | |
| def get_state(x): | |
| states = ["Andaman and Nicobar Islands","Andhra Pradesh","Arunachal Pradesh","Assam","Bihar","Chandigarh","Chhattisgarh", | |
| "Dadra and Nagar Haveli","Daman and Diu","Delhi","Goa","Gujarat","Haryana","Himachal Pradesh","Jammu and Kashmir", | |
| "Jharkhand","Karnataka","Kerala","Ladakh","Lakshadweep","Madhya Pradesh","Maharashtra","Manipur","Meghalaya", | |
| "Mizoram","Nagaland","Odisha","Puducherry","Punjab","Rajasthan","Sikkim","Tamil Nadu","Telangana","Tripura","Uttar Pradesh","Uttarakhand","West Bengal"] | |
| states_dict = {"Delhi":"New Delhi","Gujarat":"Surat","Haryana":"Gurgaon", "Karnataka":"Bangalore", "Karnataka":"Bengaluru", "Maharashtra":"Pune","Maharashtra":"Mumbai","Maharashtra":"Navi Mumbai","Telangana":"Hyderabad","West Bengal":"Kolkata", | |
| "Gujarat":"Surat","Rajasthan":"Kota","Rajasthan":"Jodhpur","Karnataka":"Bengaluru South","Uttar Pradesh":"Lukhnow","Uttar Pradesh":"Noida","Bihar":"Patna","Uttarakhand":"Dehradun","Madhya Pradesh":"Indore" , "Madhya Pradesh":"Bhopal", | |
| "Andaman and Nicobar Islands":"Andaman and Nicobar Islands", "Andhra Pradesh":"Andhra Pradesh","Arunachal Pradesh":"Arunachal Pradesh","Assam":"Assam","Bihar":"Bihar", | |
| "Chandigarh":"Chandigarh","Chhattisgarh":"Chhattisgarh", "Dadra and Nagar Haveli": "Dadra and Nagar Haveli","Daman and Diu":"Daman and Diu","Delhi":"Delhi", | |
| "Goa":"Goa","Gujarat":"Gujarat","Haryana":"Haryana","Himachal Pradesh":"Himachal Pradesh","Jammu and Kashmir":"Jammu and Kashmir", "Jharkhand": "Jharkhand", | |
| "Karnataka":"Karnataka","Kerala":"Kerala","Ladakh":"Ladakh","Lakshadweep":"Lakshadweep","Madhya Pradesh":"Madhya Pradesh","Maharashtra":"Maharashtra", | |
| "Odisha":"Odisha","Puducherry":"Puducherry","Punjab":"Punjab","Rajasthan":"Rajasthan","Tamil Nadu":"Tamil Nadu","Telangana":"Telangana","Uttar Pradesh":"Uttar Pradesh", | |
| "Uttarakhand":"Uttarakhand","West Bengal":"West Bengal","West Bengal":"Calcutta","Uttar Pradesh":"Lucknow" | |
| } | |
| abv = x.split(',')[-1].lstrip() | |
| state_name = x.split(',')[0].lstrip() | |
| if abv in states: | |
| state = abv | |
| else: | |
| if state_name in states_dict.values(): | |
| state = list(states_dict.keys())[list(states_dict.values()).index(state_name)] | |
| else: | |
| state = 'Non_India' | |
| return state | |
| # create abreviated states column | |
| df2 = df1.copy() | |
| df2['states'] = df1['location'].apply(get_state) | |
| # extract total sentiment per state | |
| df_state_sentiment = df2.groupby(['states'])['Label'].value_counts().unstack().fillna(0.0).reset_index() | |
| df_state_sentiment['total_sentiment'] = -(df_state_sentiment[0])+df_state_sentiment[1] | |
| dff = df_state_sentiment[df_state_sentiment['states'] != 'Non_India'] | |
| folium.Choropleth(geo_data=geojsonData, | |
| data=dff, | |
| name='CHOROPLETH', | |
| key_on='feature.id', | |
| columns = ['states','total_sentiment'], | |
| fill_color='YlOrRd', | |
| fill_opacity=0.7, | |
| line_opacity=0.4, | |
| legend_name='Sentiments', | |
| highlight=True).add_to(map_choropleth_high_public) | |
| folium.LayerControl().add_to(map_choropleth_high_public) | |
| #display(map_choropleth_high_public) | |
| st.sidebar.header("Map Visualisation") | |
| if not st.sidebar.checkbox("Close", True, key='4'): | |
| folium_static(map_choropleth_high_public) | |
| if __name__ == '__main__': | |
| run() | |