Spaces:
Runtime error
Runtime error
Commit ·
c025aa9
1
Parent(s): af6e65f
Update app.py
Browse filesUpdate to cleaner code with functions for most all processes
app.py
CHANGED
|
@@ -4,39 +4,92 @@ import pandas as pd
|
|
| 4 |
import sys
|
| 5 |
from streamlit import cli as stcli
|
| 6 |
import plotly.express as px
|
| 7 |
-
import numpy as np
|
| 8 |
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
# Import data from "df_redacted.csv" as a dataframe
|
| 12 |
-
df_redacted = pd.read_csv('df_redacted.csv')
|
| 13 |
-
|
| 14 |
-
# DATA MANIPULATION
|
| 15 |
# Create variable for Tweet being analyzed in this app
|
| 16 |
-
tweet_url =
|
| 17 |
|
| 18 |
-
# Create new dataframe, reset the index, and rename columns
|
| 19 |
-
sentiment_counts = pd.DataFrame(df_redacted['sentiment_score'].value_counts(dropna=False))
|
| 20 |
-
sentiment_counts = sentiment_counts.reset_index()
|
| 21 |
-
sentiment_counts.columns = ['Sentiment', 'Count']
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
# DISPLAY DATA
|
| 27 |
-
# Display the count for each sentiment category and overall sentiment of the tweet replies
|
| 28 |
-
def main():
|
| 29 |
-
# Header display section of app
|
| 30 |
-
st.markdown("**Objective:** Understand public sentiment of a Tweet by analyzing the sentiment of each reply.")
|
| 31 |
-
st.markdown("**Analysis:** This app runs sentiment analysis on the replies to a Facebook Tweet announcing their "
|
| 32 |
-
"rebranding to Meta on 10/28/2021. Link to Tweet: {}".format(tweet_url))
|
| 33 |
-
st.markdown("**Results:** Most frequent sentiment category for this Tweet's replies: **{}**".format(sentiment))
|
| 34 |
-
|
| 35 |
-
# Display histogram of count for each sentiment category
|
| 36 |
-
fig = px.bar(sentiment_counts, x='Sentiment', y='Count', title='Tweet Replies Sentiment Counts by Category')
|
| 37 |
st.plotly_chart(fig, use_container_width=True)
|
| 38 |
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
if __name__ == '__main__':
|
| 41 |
if st._is_running_with_streamlit:
|
| 42 |
main()
|
|
|
|
| 4 |
import sys
|
| 5 |
from streamlit import cli as stcli
|
| 6 |
import plotly.express as px
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
+
# GLOBAL VARIABLES
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# Create variable for Tweet being analyzed in this app
|
| 11 |
+
tweet_url = 'https://twitter.com/Meta/status/1453795115701440524'
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
# FUNCTIONS
|
| 15 |
+
def data_import():
|
| 16 |
+
"""
|
| 17 |
+
Imports data from "df_redacted.csv" as a dataframe.
|
| 18 |
+
"""
|
| 19 |
+
df_redacted = pd.read_csv('df_redacted.csv')
|
| 20 |
+
return df_redacted
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def data_manipulation(df_redacted):
|
| 24 |
+
"""
|
| 25 |
+
Manipulates the data imported from the CSV file to prepare for bar chart.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
# Create new dataframe, reset the index, and rename columns
|
| 29 |
+
sentiment_counts = pd.DataFrame(df_redacted['sentiment_score'].value_counts(dropna=False))
|
| 30 |
+
sentiment_counts = sentiment_counts.reset_index()
|
| 31 |
+
sentiment_counts.columns = ['Sentiment', 'Count']
|
| 32 |
+
|
| 33 |
+
# Find sentiment category with the highest count
|
| 34 |
+
sentiment = sentiment_counts.loc[sentiment_counts['Count'].idxmax(), 'Sentiment']
|
| 35 |
+
|
| 36 |
+
return sentiment_counts, sentiment
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def display_header(tweet_url, sentiment):
|
| 40 |
+
"""
|
| 41 |
+
Displays the header section of the app.
|
| 42 |
+
"""
|
| 43 |
+
st.markdown('**Objective:** Understand public sentiment of a Tweet by analyzing the sentiment of each reply.')
|
| 44 |
+
st.markdown('**Analysis:** This app runs sentiment analysis on 10,948 replies to a Facebook Tweet announcing '
|
| 45 |
+
'their rebranding to Meta on 10/28/2021. Link to Tweet: {}'.format(tweet_url))
|
| 46 |
+
st.markdown('**Results:** Most frequent sentiment category for the replies to this Tweet: **{}**'.format(sentiment))
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def display_chart(sentiment_counts):
|
| 50 |
+
"""
|
| 51 |
+
Displays the chosen chart for the data.
|
| 52 |
+
"""
|
| 53 |
+
# Display count for each sentiment category
|
| 54 |
+
fig = px.bar(sentiment_counts,
|
| 55 |
+
x='Sentiment',
|
| 56 |
+
y='Count',
|
| 57 |
+
title='Tweet Replies Sentiment Counts by Category')
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
st.plotly_chart(fig, use_container_width=True)
|
| 60 |
|
| 61 |
|
| 62 |
+
def display_footer():
|
| 63 |
+
"""
|
| 64 |
+
Displays the footer section of the app.
|
| 65 |
+
"""
|
| 66 |
+
st.markdown('**Notes:** ')
|
| 67 |
+
st.markdown('- The VADER model was used to analyze the sentiment of each reply: '
|
| 68 |
+
'https://github.com/cjhutto/vaderSentiment')
|
| 69 |
+
st.markdown('- Due to Twitter developer policies, I am not able to share the data set of downloaded Tweet replies '
|
| 70 |
+
'so my DATA EXTRACTION and DATA CLEANSING steps are not shown at this time.')
|
| 71 |
+
st.markdown('**Plans for Version 2.0:**')
|
| 72 |
+
st.markdown('- Formulate method for cleaning Tweet replies, such as removing those that are from bots or are spam.')
|
| 73 |
+
st.markdown('- Analyze the sentiment of replies using the BERTweet model, which would be more appropriate for this '
|
| 74 |
+
'project since it was trained on a corpus of Tweets: https://github.com/VinAIResearch/BERTweet')
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def main():
|
| 78 |
+
"""
|
| 79 |
+
Main function for the app which calls all other functions to display the app.
|
| 80 |
+
"""
|
| 81 |
+
# DATA IMPORT
|
| 82 |
+
df_redacted = data_import()
|
| 83 |
+
|
| 84 |
+
# DATA MANIPULATION
|
| 85 |
+
sentiment_counts, sentiment = data_manipulation(df_redacted)
|
| 86 |
+
|
| 87 |
+
# DISPLAY DATA
|
| 88 |
+
display_header(tweet_url, sentiment)
|
| 89 |
+
display_chart(sentiment_counts)
|
| 90 |
+
display_footer()
|
| 91 |
+
|
| 92 |
+
|
| 93 |
if __name__ == '__main__':
|
| 94 |
if st._is_running_with_streamlit:
|
| 95 |
main()
|